From 1817c2c7d0477d6dff8f4d2d35cd2353d54663f8 Mon Sep 17 00:00:00 2001 From: Thibault Jaigu Date: Tue, 23 Jun 2026 18:47:31 +0100 Subject: [PATCH 1/3] feat: add Requesty as an OpenAI-compatible LLM provider --- src/core/config/AgentOSConfig.ts | 18 + .../llm/providers/AIModelProviderManager.ts | 6 +- .../providers/errors/RequestyProviderError.ts | 56 ++ .../implementations/RequestyProvider.ts | 743 ++++++++++++++++++ .../llm/providers/structuredOutputFormat.ts | 4 +- 5 files changed, 825 insertions(+), 2 deletions(-) create mode 100644 src/core/llm/providers/errors/RequestyProviderError.ts create mode 100644 src/core/llm/providers/implementations/RequestyProvider.ts diff --git a/src/core/config/AgentOSConfig.ts b/src/core/config/AgentOSConfig.ts index 91cad3845f8..3eebc79e203 100644 --- a/src/core/config/AgentOSConfig.ts +++ b/src/core/config/AgentOSConfig.ts @@ -49,6 +49,7 @@ export interface EnvironmentConfig { OPENAI_API_KEY?: string; ANTHROPIC_API_KEY?: string; OPENROUTER_API_KEY?: string; + REQUESTY_API_KEY?: string; SERPER_API_KEY?: string; OLLAMA_BASE_URL?: string; @@ -128,6 +129,7 @@ export function getEnvironmentConfig(): EnvironmentConfig { OPENAI_API_KEY: process.env.OPENAI_API_KEY, ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY, OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY, + REQUESTY_API_KEY: process.env.REQUESTY_API_KEY, OLLAMA_BASE_URL: process.env.OLLAMA_BASE_URL || 'http://localhost:11434', LEMONSQUEEZY_API_KEY: process.env.LEMONSQUEEZY_API_KEY, LEMONSQUEEZY_WEBHOOK_SECRET: process.env.LEMONSQUEEZY_WEBHOOK_SECRET, @@ -231,6 +233,22 @@ function createModelProviderManagerConfig(env: EnvironmentConfig): AIModelProvid }); } + // Requesty Provider (OpenAI-compatible LLM gateway) + if (env.REQUESTY_API_KEY) { + providers.push({ + providerId: 'requesty', + enabled: true, + isDefault: !env.OPENAI_API_KEY && !env.OPENROUTER_API_KEY, // Default to Requesty if OpenAI/OpenRouter not available + config: { + apiKey: env.REQUESTY_API_KEY, + baseURL: 'https://router.requesty.ai/v1', + defaultModel: 'openai/gpt-4o', + maxRetries: 3, + timeout: 60000, + }, + }); + } + // Ollama Provider (local) if (env.OLLAMA_BASE_URL) { providers.push({ diff --git a/src/core/llm/providers/AIModelProviderManager.ts b/src/core/llm/providers/AIModelProviderManager.ts index c3e7aa1b1e2..aa23227744b 100644 --- a/src/core/llm/providers/AIModelProviderManager.ts +++ b/src/core/llm/providers/AIModelProviderManager.ts @@ -21,6 +21,7 @@ import { IProvider, ModelInfo } from './IProvider'; import { OpenAIProvider, OpenAIProviderConfig } from './implementations/OpenAIProvider'; import { OpenRouterProvider, OpenRouterProviderConfig } from './implementations/OpenRouterProvider'; +import { RequestyProvider, RequestyProviderConfig } from './implementations/RequestyProvider'; import { OllamaProvider, OllamaProviderConfig } from './implementations/OllamaProvider'; import { AnthropicProvider, AnthropicProviderConfig } from './implementations/AnthropicProvider'; import { GroqProvider, GroqProviderConfig } from './implementations/GroqProvider'; @@ -39,7 +40,7 @@ import { GMIError, GMIErrorCode, createGMIErrorFromError } from '../../utils/err export interface ProviderConfigEntry { providerId: string; enabled: boolean; - config: Partial>; + config: Partial>; isDefault?: boolean; } @@ -126,6 +127,9 @@ export class AIModelProviderManager { case 'openrouter': providerInstance = new OpenRouterProvider(); break; + case 'requesty': + providerInstance = new RequestyProvider(); + break; case 'ollama': providerInstance = new OllamaProvider(); break; diff --git a/src/core/llm/providers/errors/RequestyProviderError.ts b/src/core/llm/providers/errors/RequestyProviderError.ts new file mode 100644 index 00000000000..afb22b2cd31 --- /dev/null +++ b/src/core/llm/providers/errors/RequestyProviderError.ts @@ -0,0 +1,56 @@ +// File: backend/agentos/core/llm/providers/errors/RequestyProviderError.ts +/** + * @fileoverview Defines a custom error class for Requesty-specific provider errors. + * This extends the base {@link ProviderError} to include details specific to Requesty API interactions. + * @module backend/agentos/core/llm/providers/errors/RequestyProviderError + * @see {@link ProviderError} + */ + +import { ProviderError } from './ProviderError'; + +/** + * Represents an error specific to the Requesty provider. + * It can include additional context like HTTP status codes or specific Requesty error messages. + * + * @example + * try { + * // Requesty API call + * } catch (error) { + * if (error instanceof RequestyProviderError) { + * console.error(`Requesty Error (Status: ${error.httpStatus || 'N/A'}, Type: ${error.requestyErrorType || 'N/A'}): ${error.message}`); + * // Handle Requesty-specific error properties + * } else { + * // Handle other errors + * } + * } + */ +export class RequestyProviderError extends ProviderError { + /** HTTP status code from the API response (e.g., 400, 401, 429, 500). */ + public readonly httpStatus?: number; + + /** Requesty specific error type, if available from the response. */ + public readonly requestyErrorType?: string; + + /** + * Creates an instance of RequestyProviderError. + * @param {string} message - A human-readable description of the error. + * @param {string} code - A unique AgentOS internal code identifying the type of error (e.g., 'API_REQUEST_FAILED', 'INVALID_ROUTE'). + * @param {number} [httpStatus] - HTTP status code from the API response. + * @param {string} [requestyErrorType] - Requesty specific error type. + * @param {unknown} [details] - Optional underlying error object or additional context from Requesty. + */ + constructor( + message: string, + code: string, + httpStatus?: number, + requestyErrorType?: string, + details?: unknown + ) { + super(message, code, 'requesty', details); // ProviderId is 'requesty' + this.name = 'RequestyProviderError'; + this.httpStatus = httpStatus; + this.requestyErrorType = requestyErrorType; + + Object.setPrototypeOf(this, RequestyProviderError.prototype); + } +} diff --git a/src/core/llm/providers/implementations/RequestyProvider.ts b/src/core/llm/providers/implementations/RequestyProvider.ts new file mode 100644 index 00000000000..176643362bd --- /dev/null +++ b/src/core/llm/providers/implementations/RequestyProvider.ts @@ -0,0 +1,743 @@ +// File: backend/agentos/core/llm/providers/implementations/RequestyProvider.ts +/** + * @fileoverview Implements the IProvider interface for Requesty, an OpenAI-compatible + * LLM gateway that provides access to a wide variety of LLMs from different providers + * through a unified API. This provider handles routing requests to the specified models + * via Requesty. + * @module backend/agentos/core/llm/providers/implementations/RequestyProvider + * @implements {IProvider} + */ + +import axios, { AxiosInstance, AxiosError, ResponseType } from 'axios'; +import { + IProvider, + ChatMessage, + ModelCompletionOptions, + ModelCompletionResponse, + ModelInfo, + ModelUsage, + ProviderEmbeddingOptions, + ProviderEmbeddingResponse, + ModelCompletionChoice, +} from '../IProvider'; +import { RequestyProviderError } from '../errors/RequestyProviderError'; +import { ApiKeyPool } from '../../../providers/ApiKeyPool.js'; +import { createGMIErrorFromError, GMIErrorCode } from '../../../utils/errors.js'; // Corrected import path +import { clampMaxOutputTokens } from '../model-output-limits.js'; + +/** + * Configuration specific to the RequestyProvider. + */ +export interface RequestyProviderConfig { + apiKey: string; + baseURL?: string; + defaultModelId?: string; + siteUrl?: string; + appName?: string; + requestTimeout?: number; + streamRequestTimeout?: number; +} + +interface RequestyChatChoice { + index: number; + message?: { + role: ChatMessage['role']; + content: string | null; + tool_calls?: ChatMessage['tool_calls']; + }; + delta?: { + role?: ChatMessage['role']; + content?: string | null; + tool_calls?: Array<{ + index: number; + id?: string; + type?: 'function'; + function?: { name?: string; arguments?: string; }; + }>; + }; + finish_reason: string | null; + logprobs?: unknown; +} + +interface RequestyChatCompletionAPIResponse { + id: string; + object: string; + created: number; + model: string; + choices: RequestyChatChoice[]; + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + cost?: number; + }; +} + +interface RequestyEmbeddingAPIResponse { + object: 'list'; + data: Array<{ + object: 'embedding'; + embedding: number[]; + index: number; + }>; + model: string; + usage: { + prompt_tokens: number; + total_tokens: number; + }; +} + +interface RequestyModelAPIObject { + id: string; + name: string; + description: string; + pricing: { + prompt: string; + completion: string; + request?: string; + image?: string; + }; + context_length: number | null; + architecture?: { + modality: string; + tokenizer: string; + instruct_type: string | null; + }; + top_provider: { + max_retries: number | null; + is_fallback: boolean | null; + }; +} + +interface RequestyListModelsAPIResponse { + data: RequestyModelAPIObject[]; +} + +export class RequestyProvider implements IProvider { + public readonly providerId: string = 'requesty'; + public isInitialized: boolean = false; + public defaultModelId?: string; + + // Corrected: Changed type of this.config to satisfy the Readonly> assignment by providing defaults + private config!: Readonly> & RequestyProviderConfig>; + private keyPool: ApiKeyPool | null = null; + private client!: AxiosInstance; + private readonly availableModelsCache: Map = new Map(); + + constructor() {} + + public async initialize(config: RequestyProviderConfig): Promise { + if (!config.apiKey) { + throw new RequestyProviderError( + 'Requesty API key (apiKey) is required for initialization.', + 'INIT_FAILED_MISSING_API_KEY' + ); + } + // Corrected: Ensure all properties of Required are present + // by providing defaults for optional fields before freezing. + this.config = Object.freeze({ + apiKey: config.apiKey, + baseURL: config.baseURL || 'https://router.requesty.ai/v1', + defaultModelId: config.defaultModelId, // Can be undefined, but this.defaultModelId will store it + siteUrl: config.siteUrl, // Can be undefined + appName: config.appName, // Can be undefined + requestTimeout: config.requestTimeout || 60000, + streamRequestTimeout: config.streamRequestTimeout || 180000, + }); + this.keyPool = new ApiKeyPool(config.apiKey); + this.defaultModelId = this.config.defaultModelId; // Store the potentially undefined value + + const headers: Record = { + 'Authorization': `Bearer ${this.keyPool.next()}`, + 'Content-Type': 'application/json', + 'User-Agent': `AgentOS/1.0 (RequestyProvider; ${this.config.appName || 'UnknownApp'})`, + }; + if (this.config.siteUrl) { + headers['HTTP-Referer'] = this.config.siteUrl; + } + if (this.config.appName) { + headers['X-Title'] = this.config.appName; + } + + this.client = axios.create({ + baseURL: this.config.baseURL, + headers, + }); + + try { + await this.refreshAvailableModels(); + this.isInitialized = true; + console.log(`RequestyProvider initialized. Default Model: ${this.defaultModelId || 'Not set'}. Found ${this.availableModelsCache.size} models via Requesty.`); + } catch (error: unknown) { + this.isInitialized = false; + const initError = error instanceof RequestyProviderError ? error : + createGMIErrorFromError( // Corrected: use imported createGMIErrorFromError + error instanceof Error ? error : new Error(String(error)), + GMIErrorCode.LLM_PROVIDER_ERROR, // Corrected: use imported GMIErrorCode + { providerId: this.providerId }, + `RequestyProvider failed to initialize: ${error instanceof Error ? error.message : String(error)}` + ); + console.error(initError.message, initError.details || initError); + throw initError; + } + } + + private async refreshAvailableModels(): Promise { + const responseData = await this.makeApiRequest( + '/models', + 'GET', + this.config.requestTimeout + ); + + this.availableModelsCache.clear(); + if (responseData && Array.isArray(responseData.data)) { + responseData.data.forEach((apiModel: RequestyModelAPIObject) => { + const modelInfo = this.mapApiToModelInfo(apiModel); + this.availableModelsCache.set(modelInfo.modelId, modelInfo); + }); + } else { + console.warn("RequestyProvider: Received no model data or malformed response from /models endpoint."); + } + } + + private mapApiToModelInfo(apiModel: RequestyModelAPIObject): ModelInfo { + const capabilities: ModelInfo['capabilities'] = ['chat', 'completion']; + if (apiModel.architecture?.modality === 'multimodal') { + capabilities.push('vision_input'); + } + const knownAdvancedModelPatterns = ['gpt-3.5', 'gpt-4', 'claude-2', 'claude-3', 'gemini', 'mistral', 'llama']; + if (knownAdvancedModelPatterns.some(pattern => apiModel.id.toLowerCase().includes(pattern))) { + capabilities.push('tool_use', 'json_mode'); + } + if (apiModel.id.includes('embedding') || apiModel.id.includes('embed')) { + capabilities.push('embeddings'); + } + + const parsePrice = (priceStr: string | undefined, tokensFactor: number = 1000000): number | undefined => { + if (typeof priceStr !== 'string') return undefined; + const price = parseFloat(priceStr); + return isNaN(price) ? undefined : price * tokensFactor; + }; + + return { + modelId: apiModel.id, + providerId: this.providerId, + displayName: apiModel.name, + description: apiModel.description, + capabilities: Array.from(new Set(capabilities)), + contextWindowSize: apiModel.context_length || undefined, + pricePer1MTokensInput: parsePrice(apiModel.pricing.prompt), + pricePer1MTokensOutput: parsePrice(apiModel.pricing.completion), + supportsStreaming: true, + status: 'active', + }; + } + + private ensureInitialized(): void { + if (!this.isInitialized) { + throw new RequestyProviderError( + 'RequestyProvider is not initialized. Please call the initialize() method first.', + 'PROVIDER_NOT_INITIALIZED' + ); + } + } + + private mapToRequestyMessages(messages: ChatMessage[]): Array> { + return messages.map(msg => { + const mappedMsg: Partial = { role: msg.role, content: msg.content }; + if (msg.name) mappedMsg.name = msg.name; + if (msg.tool_calls) mappedMsg.tool_calls = msg.tool_calls; + if (msg.tool_call_id) mappedMsg.tool_call_id = msg.tool_call_id; + return mappedMsg; + }); + } + + public async generateCompletion( + modelId: string, + messages: ChatMessage[], + options: ModelCompletionOptions + ): Promise { + this.ensureInitialized(); + const requestyMessages = this.mapToRequestyMessages(messages); + + const payload: Record = { + model: modelId, + messages: requestyMessages, + stream: false, + ...(options.temperature !== undefined && { temperature: options.temperature }), + ...(options.topP !== undefined && { top_p: options.topP }), + // Requesty reserves credits up to max_tokens at request time. When the + // caller doesn't specify a limit, it falls back to the model's full output + // capacity (e.g. 64000 for claude-haiku-4-5), which causes 402 credit-required + // errors on accounts without enough buffer. Default to 4096 — the same value + // AnthropicProvider uses — so short prompts succeed without explicit tuning. + // Then clamp to the model's output ceiling so a request sized for a + // flagship model is not rejected when routed to a lower-ceiling OpenAI + // model (e.g. openai/gpt-4o caps at 16384, not 32000). + max_tokens: clampMaxOutputTokens(modelId, options.maxTokens) ?? 4096, + ...(options.presencePenalty !== undefined && { presence_penalty: options.presencePenalty }), + ...(options.frequencyPenalty !== undefined && { frequency_penalty: options.frequencyPenalty }), + ...(options.stopSequences !== undefined && { stop: options.stopSequences }), + ...(options.userId !== undefined && { user: options.userId }), + ...(options.tools !== undefined && { tools: options.tools }), + ...(options.toolChoice !== undefined && { tool_choice: options.toolChoice }), + ...(options.responseFormat?.type === 'json_object' && { response_format: { type: 'json_object' } }), + ...(options.customModelParams || {}), + }; + + const apiResponseData = await this.makeApiRequest( + '/chat/completions', + 'POST', + // CR8: honor a per-call requestTimeout override over the provider default. + options.requestTimeout ?? this.config.requestTimeout, + payload + ); + return this.mapApiToCompletionResponse(apiResponseData, modelId); + } + + public async *generateCompletionStream( + modelId: string, + messages: ChatMessage[], + options: ModelCompletionOptions + ): AsyncGenerator { + this.ensureInitialized(); + const requestyMessages = this.mapToRequestyMessages(messages); + + const payload: Record = { + model: modelId, + messages: requestyMessages, + stream: true, + // Requesty follows the OpenAI streaming convention: usage is omitted + // unless stream_options.include_usage is set, in which case a trailing + // usage-only chunk arrives before [DONE]. Without this flag, + // streamText({...}).usage resolves to all zeros even on success. + stream_options: { include_usage: true }, + ...(options.temperature !== undefined && { temperature: options.temperature }), + ...(options.topP !== undefined && { top_p: options.topP }), + // Requesty reserves credits up to max_tokens at request time. When the + // caller doesn't specify a limit, it falls back to the model's full output + // capacity (e.g. 64000 for claude-haiku-4-5), which causes 402 credit-required + // errors on accounts without enough buffer. Default to 4096 — the same value + // AnthropicProvider uses — so short prompts succeed without explicit tuning. + // Then clamp to the model's output ceiling so a request sized for a + // flagship model is not rejected when routed to a lower-ceiling OpenAI + // model (e.g. openai/gpt-4o caps at 16384, not 32000). + max_tokens: clampMaxOutputTokens(modelId, options.maxTokens) ?? 4096, + ...(options.presencePenalty !== undefined && { presence_penalty: options.presencePenalty }), + ...(options.frequencyPenalty !== undefined && { frequency_penalty: options.frequencyPenalty }), + ...(options.stopSequences !== undefined && { stop: options.stopSequences }), + ...(options.userId !== undefined && { user: options.userId }), + ...(options.tools !== undefined && { tools: options.tools }), + ...(options.toolChoice !== undefined && { tool_choice: options.toolChoice }), + ...(options.responseFormat?.type === 'json_object' && { response_format: { type: 'json_object' } }), + ...(options.customModelParams || {}), + }; + + const stream = await this.makeApiRequest( + '/chat/completions', + 'POST', + // CR8: honor a per-call requestTimeout override over the stream default. + options.requestTimeout ?? this.config.streamRequestTimeout, + payload, + true + ); + + const accumulatedToolCalls: Map = new Map(); + + const abortSignal = options.abortSignal; + if (abortSignal?.aborted) { + yield { id: `requesty-abort-${Date.now()}`, object: 'chat.completion.chunk', created: Math.floor(Date.now()/1000), modelId, choices: [], error: { message: 'Stream aborted prior to first chunk', type: 'abort' }, isFinal: true }; + return; + } + const abortHandler = () => { /* passive; loop logic handles emission */ }; + abortSignal?.addEventListener('abort', abortHandler, { once: true }); + + for await (const rawChunk of this.parseSseStream(stream)) { + if (abortSignal?.aborted) { + yield { id: `requesty-abort-${Date.now()}`, object: 'chat.completion.chunk', created: Math.floor(Date.now()/1000), modelId, choices: [], error: { message: 'Stream aborted by caller', type: 'abort' }, isFinal: true }; + break; + } + if (rawChunk.startsWith('data: ') && rawChunk.includes('[DONE]')) { + const doneData = rawChunk.substring('data: '.length).trim(); + if (doneData === '[DONE]') break; + } + if (rawChunk === 'data: [DONE]') { + break; + } + + if (rawChunk.startsWith('data: ')) { + const jsonData = rawChunk.substring('data: '.length); + try { + const apiChunk = JSON.parse(jsonData) as RequestyChatCompletionAPIResponse; + yield this.mapApiToStreamChunkResponse(apiChunk, modelId, accumulatedToolCalls); + // Don't break on finish_reason: with stream_options.include_usage, + // Requesty (like OpenAI) emits a trailing usage-only chunk AFTER + // the finish_reason chunk and BEFORE [DONE]. Breaking here would + // skip the usage chunk and zero out the caller's token totals. The + // [DONE] marker check above is the right termination signal. + } catch (error: unknown) { + console.warn('RequestyProvider: Failed to parse stream chunk JSON, skipping chunk. Data:', jsonData, 'Error:', error); + } + } + } + abortSignal?.removeEventListener('abort', abortHandler); + } + + public async generateEmbeddings( + modelId: string, + texts: string[], + options?: ProviderEmbeddingOptions + ): Promise { + this.ensureInitialized(); + if (!texts || texts.length === 0) { + throw new RequestyProviderError('Input texts array cannot be empty for generating embeddings.', 'EMBEDDING_NO_INPUT'); + } + + const modelInfo = await this.getModelInfo(modelId); + if (modelInfo && !modelInfo.capabilities.includes('embeddings')) { + console.warn(`RequestyProvider: Model '${modelId}' is not explicitly listed with embedding capabilities. Attempting anyway.`); + } + + const payload: Record = { + model: modelId, + input: texts, + ...(options?.encodingFormat && { encoding_format: options.encodingFormat }), + ...(options?.dimensions && { dimensions: options.dimensions }), + ...(options?.customModelParams || {}), + }; + if (options?.inputType && payload.customModelParams && typeof payload.customModelParams === 'object') { + (payload.customModelParams as Record).input_type = options.inputType; + } else if (options?.inputType) { + payload.customModelParams = { input_type: options.inputType }; + } + + const apiResponseData = await this.makeApiRequest( + '/embeddings', + 'POST', + this.config.requestTimeout, + payload + ); + + return { + object: 'list', + data: apiResponseData.data.map(d => ({ + object: 'embedding', + embedding: d.embedding, + index: d.index, + })), + model: apiResponseData.model, + usage: { + prompt_tokens: apiResponseData.usage.prompt_tokens, + total_tokens: apiResponseData.usage.total_tokens, + }, + }; + } + + public async listAvailableModels(filter?: { capability?: string }): Promise { + this.ensureInitialized(); + if (this.availableModelsCache.size === 0) { + try { + await this.refreshAvailableModels(); + } catch (refreshError) { + console.warn("RequestyProvider: Failed to refresh models during listAvailableModels call after finding empty cache:", refreshError); + } + } + const models = Array.from(this.availableModelsCache.values()); + if (filter?.capability) { + return models.filter(m => m.capabilities.includes(filter.capability!)); + } + return models; + } + + public async getModelInfo(modelId: string): Promise { + this.ensureInitialized(); + if (!this.availableModelsCache.has(modelId)) { + try { + console.log(`RequestyProvider: Model ${modelId} not in cache. Refreshing model list.`); + await this.refreshAvailableModels(); + } catch (error) { + console.warn(`RequestyProvider: Failed to refresh models list while trying to get info for ${modelId}:`, error); + } + } + return this.availableModelsCache.get(modelId); + } + + public async checkHealth(): Promise<{ isHealthy: boolean; details?: unknown }> { + if (!this.client) { + return { isHealthy: false, details: { message: "RequestyProvider not initialized (HTTP client missing)."}}; + } + try { + await this.client.get('/models', { timeout: Math.min(this.config.requestTimeout || 10000, 10000) }); + return { isHealthy: true, details: { message: "Successfully connected to Requesty /models endpoint." } }; + } catch (error: unknown) { + const err = error as AxiosError; + return { + isHealthy: false, + details: { + message: `Requesty health check failed: ${err.message}`, + status: err.response?.status, + responseData: err.response?.data, + }, + }; + } + } + + public async shutdown(): Promise { + this.isInitialized = false; + this.availableModelsCache.clear(); + console.log('RequestyProvider shutdown: Instance marked as uninitialized and cache cleared.'); + } + + private mapApiToCompletionResponse( + apiResponse: RequestyChatCompletionAPIResponse, + requestedModelId: string + ): ModelCompletionResponse { + const choice = apiResponse.choices[0]; + if (!choice) { + throw new RequestyProviderError("Received empty choices array from Requesty.", "API_RESPONSE_MALFORMED", undefined, undefined, { responseId: apiResponse.id }); + } + + const usage: ModelUsage | undefined = apiResponse.usage ? { + promptTokens: apiResponse.usage.prompt_tokens, + completionTokens: apiResponse.usage.completion_tokens, + totalTokens: apiResponse.usage.total_tokens, + costUSD: apiResponse.usage.cost, + } : undefined; + + return { + id: apiResponse.id, + object: apiResponse.object, + created: apiResponse.created, + modelId: apiResponse.model || requestedModelId, + choices: apiResponse.choices.map(c => ({ + index: c.index, + message: { + role: c.message!.role, + content: c.message!.content, + tool_calls: c.message!.tool_calls, + }, + finishReason: c.finish_reason, + logprobs: c.logprobs, + })), + usage, + }; + } + + private mapApiToStreamChunkResponse( + apiChunk: RequestyChatCompletionAPIResponse, + requestedModelId: string, + accumulatedToolCalls: Map + ): ModelCompletionResponse { + const choice = apiChunk.choices[0]; + + // With stream_options.include_usage, Requesty (like OpenAI) emits a + // trailing chunk with an empty choices array and a populated usage + // object. Recognize it as a final usage-only chunk so callers see real + // token totals after the stream resolves. Without this, the empty-choices + // path below would mark it as a malformed-response error. + if ((!apiChunk.choices || apiChunk.choices.length === 0) && apiChunk.usage) { + return { + id: apiChunk.id, + object: apiChunk.object, + created: apiChunk.created, + modelId: apiChunk.model || requestedModelId, + choices: [], + isFinal: true, + usage: { + promptTokens: apiChunk.usage.prompt_tokens, + completionTokens: apiChunk.usage.completion_tokens, + totalTokens: apiChunk.usage.total_tokens, + costUSD: apiChunk.usage.cost, + }, + }; + } + + if (!choice) { + return { + id: apiChunk.id, object: apiChunk.object, created: apiChunk.created, + modelId: apiChunk.model || requestedModelId, choices: [], isFinal: true, + error: { message: "Stream chunk contained no choices.", type: "invalid_response" } + }; + } + + let responseTextDelta: string | undefined; + let toolCallsDeltas: ModelCompletionResponse['toolCallsDeltas']; + + if (choice.delta?.content) { + responseTextDelta = choice.delta.content; + } + + if (choice.delta?.tool_calls) { + toolCallsDeltas = []; + choice.delta.tool_calls.forEach(tcDelta => { + let currentToolCallState = accumulatedToolCalls.get(tcDelta.index); + if (!currentToolCallState) { + currentToolCallState = { function: { name: '', arguments: ''} }; + } + + if (tcDelta.id) currentToolCallState.id = tcDelta.id; + if (tcDelta.type) currentToolCallState.type = tcDelta.type as 'function'; + if (tcDelta.function?.name) currentToolCallState.function!.name = (currentToolCallState.function!.name || '') + tcDelta.function.name; + if (tcDelta.function?.arguments) currentToolCallState.function!.arguments = (currentToolCallState.function!.arguments || '') + tcDelta.function.arguments; + + accumulatedToolCalls.set(tcDelta.index, currentToolCallState); + + toolCallsDeltas!.push({ + index: tcDelta.index, + id: tcDelta.id, + type: tcDelta.type as 'function', + function: tcDelta.function ? { + name: tcDelta.function.name, + arguments_delta: tcDelta.function.arguments + } : undefined, + }); + }); + } + + const isFinal = !!choice.finish_reason; + let finalUsage: ModelUsage | undefined; + const finalChoices: ModelCompletionChoice[] = []; + + if (isFinal) { + if (apiChunk.usage) { + finalUsage = { + promptTokens: apiChunk.usage.prompt_tokens, + completionTokens: apiChunk.usage.completion_tokens, + totalTokens: apiChunk.usage.total_tokens, + costUSD: apiChunk.usage.cost, + }; + } + const finalMessage: ChatMessage = { + role: choice.delta?.role || accumulatedToolCalls.size > 0 ? 'assistant' : (choice.message?.role || 'assistant'), + content: responseTextDelta || (choice.message?.content || null), + tool_calls: Array.from(accumulatedToolCalls.values()) + .filter(tc => tc.id && tc.function?.name) + .map(accTc => ({ + id: accTc.id!, + type: accTc.type!, + function: { name: accTc.function!.name!, arguments: accTc.function!.arguments! } + })), + }; + if (!finalMessage.tool_calls || finalMessage.tool_calls.length === 0) { + delete finalMessage.tool_calls; + } + if (responseTextDelta && !choice.message?.content && accumulatedToolCalls.size === 0) { + finalMessage.content = responseTextDelta; + } else if (accumulatedToolCalls.size > 0 && !responseTextDelta && !choice.message?.content) { + finalMessage.content = null; + } + + finalChoices.push({ + index: choice.index, + message: finalMessage, + finishReason: choice.finish_reason, + logprobs: choice.logprobs, + }); + } else { + finalChoices.push({ + index: choice.index, + message: { + role: choice.delta?.role || 'assistant', + content: responseTextDelta || null, + }, + finishReason: null, + }); + } + + return { + id: apiChunk.id, + object: apiChunk.object, + created: apiChunk.created, + modelId: apiChunk.model || requestedModelId, + choices: finalChoices, + responseTextDelta: isFinal ? undefined : responseTextDelta, + toolCallsDeltas: isFinal ? undefined : toolCallsDeltas, + isFinal, + usage: finalUsage, + }; + } + + private async makeApiRequest( + endpoint: string, + method: 'GET' | 'POST', + timeout?: number, + body?: Record, + expectStream: boolean = false + ): Promise { + try { + const response = await this.client.request({ + url: endpoint, + method, + data: body, + timeout: timeout, + responseType: expectStream ? 'stream' as ResponseType : 'json' as ResponseType, + }); + return response.data; + } catch (error: unknown) { + let statusCode: number | undefined; + let errorData: any; + let errorMessage = 'Unknown Requesty API error'; + let errorType = 'UNKNOWN_API_ERROR'; + + if (axios.isAxiosError(error)) { + statusCode = error.response?.status; + errorData = error.response?.data; + if (errorData?.error && typeof errorData.error === 'object') { + errorMessage = errorData.error.message || errorMessage; + errorType = errorData.error.type || errorType; + } else if (typeof errorData === 'string') { + errorMessage = errorData; + } else if ((error as Error).message) { + errorMessage = (error as Error).message; + } + } else if (error instanceof Error) { + errorMessage = error.message; + } + + // Prefix the status code into the message so downstream retry/fallback + // logic (e.g. isRetryableError, which greps for \b402\b) can route on it + // even when the Requesty API body provides a friendlier description. + const decoratedMessage = statusCode ? `[${statusCode}] ${errorMessage}` : errorMessage; + throw new RequestyProviderError( + decoratedMessage, + 'API_REQUEST_FAILED', + statusCode, + errorType, + { requestEndpoint: endpoint, requestBodyPreview: body ? JSON.stringify(body).substring(0, 200) + '...' : undefined, responseData: errorData, underlyingError: error } + ); + } + } + + private async *parseSseStream(stream: NodeJS.ReadableStream): AsyncGenerator { + let buffer = ''; + const readableStream = stream as NodeJS.ReadableStream & { destroy?: () => void }; + + try { + for await (const chunk of readableStream) { + buffer += chunk.toString(); + let eolIndex; + while ((eolIndex = buffer.indexOf('\n')) >= 0) { + const line = buffer.substring(0, eolIndex).trim(); + buffer = buffer.substring(eolIndex + 1); + if (line) { + yield line; + } + } + } + if (buffer.trim()) { + yield buffer.trim(); + } + } catch (error: unknown) { + const message = error instanceof Error ? error.message : "Requesty stream parsing/reading error"; + console.error("RequestyProvider: Error reading or parsing SSE stream:", message, error); + if (error instanceof RequestyProviderError) throw error; + throw new RequestyProviderError(message, 'STREAM_PARSING_ERROR', undefined, undefined, error); + } finally { + if (typeof readableStream.destroy === 'function') { + readableStream.destroy(); + } else if (typeof (readableStream as any).close === 'function') { + (readableStream as any).close(); + } + } + } +} diff --git a/src/core/llm/providers/structuredOutputFormat.ts b/src/core/llm/providers/structuredOutputFormat.ts index 3c38c21d663..9b211a0e78c 100644 --- a/src/core/llm/providers/structuredOutputFormat.ts +++ b/src/core/llm/providers/structuredOutputFormat.ts @@ -11,6 +11,7 @@ * - anthropic → forced tool_use (single tool with input_schema, tool_choice forced) * - gemini → generationConfig.responseMimeType + responseSchema * - openrouter → degrades to json_object (OpenRouter has no schema enforcement) + * - requesty → degrades to json_object (aggregator; no cross-upstream schema enforcement) * * @module agentos/core/llm/providers/structuredOutputFormat */ @@ -94,8 +95,9 @@ export function buildResponseFormat( _gemini: { responseSchema: jsonSchema }, }; case 'openrouter': + case 'requesty': default: - // OpenRouter and unknown providers: best-effort json_object. The + // OpenRouter, Requesty, and unknown providers: best-effort json_object. The // model is asked to return JSON but the schema is not enforced // by the provider. Caller-side Zod validation still runs and // throws on invalid output rather than retrying. From eb55cf4f2604644b0dca568a91693d3d36c5fcf5 Mon Sep 17 00:00:00 2001 From: Thibault Jaigu Date: Tue, 23 Jun 2026 18:54:22 +0100 Subject: [PATCH 2/3] fix: correct role selection operator precedence in streaming finalize --- src/core/llm/providers/implementations/OpenRouterProvider.ts | 2 +- src/core/llm/providers/implementations/RequestyProvider.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/llm/providers/implementations/OpenRouterProvider.ts b/src/core/llm/providers/implementations/OpenRouterProvider.ts index a949e87808f..c64b0775c37 100644 --- a/src/core/llm/providers/implementations/OpenRouterProvider.ts +++ b/src/core/llm/providers/implementations/OpenRouterProvider.ts @@ -607,7 +607,7 @@ export class OpenRouterProvider implements IProvider { }; } const finalMessage: ChatMessage = { - role: choice.delta?.role || accumulatedToolCalls.size > 0 ? 'assistant' : (choice.message?.role || 'assistant'), + role: choice.delta?.role || (accumulatedToolCalls.size > 0 ? 'assistant' : (choice.message?.role || 'assistant')), content: responseTextDelta || (choice.message?.content || null), tool_calls: Array.from(accumulatedToolCalls.values()) .filter(tc => tc.id && tc.function?.name) diff --git a/src/core/llm/providers/implementations/RequestyProvider.ts b/src/core/llm/providers/implementations/RequestyProvider.ts index 176643362bd..5b913b55ae9 100644 --- a/src/core/llm/providers/implementations/RequestyProvider.ts +++ b/src/core/llm/providers/implementations/RequestyProvider.ts @@ -608,7 +608,7 @@ export class RequestyProvider implements IProvider { }; } const finalMessage: ChatMessage = { - role: choice.delta?.role || accumulatedToolCalls.size > 0 ? 'assistant' : (choice.message?.role || 'assistant'), + role: choice.delta?.role || (accumulatedToolCalls.size > 0 ? 'assistant' : (choice.message?.role || 'assistant')), content: responseTextDelta || (choice.message?.content || null), tool_calls: Array.from(accumulatedToolCalls.values()) .filter(tc => tc.id && tc.function?.name) From d431e8db50316c787ccd9ea417f657abb9e15a50 Mon Sep 17 00:00:00 2001 From: Thibault Jaigu Date: Fri, 26 Jun 2026 07:31:22 +0100 Subject: [PATCH 3/3] fix: address review feedback on Requesty provider (config keys, stream error contract, abort, embeddings payload) --- src/api/model.ts | 2 + .../__tests__/provider-defaults.test.ts | 16 +++- src/api/runtime/provider-defaults.ts | 5 ++ src/core/config/AgentOSConfig.ts | 7 +- .../implementations/RequestyProvider.ts | 90 ++++++++++--------- 5 files changed, 74 insertions(+), 46 deletions(-) diff --git a/src/api/model.ts b/src/api/model.ts index dee2edc6bb7..620d8e15755 100644 --- a/src/api/model.ts +++ b/src/api/model.ts @@ -41,6 +41,7 @@ const ENV_KEY_MAP: Record = { openai: 'OPENAI_API_KEY', anthropic: 'ANTHROPIC_API_KEY', openrouter: 'OPENROUTER_API_KEY', + requesty: 'REQUESTY_API_KEY', gemini: 'GEMINI_API_KEY', groq: 'GROQ_API_KEY', together: 'TOGETHER_API_KEY', @@ -53,6 +54,7 @@ const ENV_KEY_MAP: Record = { const ENV_URL_MAP: Record = { openai: 'OPENAI_BASE_URL', openrouter: 'OPENROUTER_BASE_URL', + requesty: 'REQUESTY_BASE_URL', stability: 'STABILITY_BASE_URL', replicate: 'REPLICATE_BASE_URL', ollama: 'OLLAMA_BASE_URL', diff --git a/src/api/runtime/__tests__/provider-defaults.test.ts b/src/api/runtime/__tests__/provider-defaults.test.ts index 39449d1ccdb..fee95ac95fc 100644 --- a/src/api/runtime/__tests__/provider-defaults.test.ts +++ b/src/api/runtime/__tests__/provider-defaults.test.ts @@ -13,7 +13,7 @@ import { resolveModelOption, resolveProvider } from '../model.js'; describe('PROVIDER_DEFAULTS', () => { it('has text model for all major providers', () => { - for (const id of ['openai', 'anthropic', 'ollama', 'openrouter', 'gemini']) { + for (const id of ['openai', 'anthropic', 'ollama', 'openrouter', 'gemini', 'requesty']) { expect(PROVIDER_DEFAULTS[id]?.text).toBeDefined(); } }); @@ -47,6 +47,7 @@ describe('autoDetectProvider', () => { 'TOGETHER_API_KEY', 'MISTRAL_API_KEY', 'XAI_API_KEY', + 'REQUESTY_API_KEY', 'OLLAMA_BASE_URL', 'STABILITY_API_KEY', 'REPLICATE_API_TOKEN', @@ -72,6 +73,19 @@ describe('autoDetectProvider', () => { expect(autoDetectProvider()).toBe('openrouter'); }); + it('detects requesty from REQUESTY_API_KEY', () => { + delete process.env.OPENAI_API_KEY; + delete process.env.ANTHROPIC_API_KEY; + delete process.env.OPENROUTER_API_KEY; + delete process.env.GEMINI_API_KEY; + delete process.env.GROQ_API_KEY; + delete process.env.TOGETHER_API_KEY; + delete process.env.MISTRAL_API_KEY; + delete process.env.XAI_API_KEY; + process.env.REQUESTY_API_KEY = 'requesty-test'; + expect(autoDetectProvider()).toBe('requesty'); + }); + it('detects anthropic from ANTHROPIC_API_KEY', () => { delete process.env.OPENAI_API_KEY; delete process.env.OPENROUTER_API_KEY; diff --git a/src/api/runtime/provider-defaults.ts b/src/api/runtime/provider-defaults.ts index a97356542ad..4c2d6167b01 100644 --- a/src/api/runtime/provider-defaults.ts +++ b/src/api/runtime/provider-defaults.ts @@ -55,6 +55,10 @@ export const PROVIDER_DEFAULTS: Record = { text: 'openai/gpt-4o', cheap: 'openai/gpt-4o-mini', }, + requesty: { + text: 'openai/gpt-4o', + cheap: 'openai/gpt-4o-mini', + }, gemini: { text: 'gemini-2.5-flash', cheap: 'gemini-2.0-flash', @@ -131,6 +135,7 @@ const AUTO_DETECT_ORDER: AutoDetectProbe[] = [ { envKey: 'TOGETHER_API_KEY', provider: 'together' }, { envKey: 'MISTRAL_API_KEY', provider: 'mistral' }, { envKey: 'XAI_API_KEY', provider: 'xai' }, + { envKey: 'REQUESTY_API_KEY', provider: 'requesty' }, { binaryName: 'claude', provider: 'claude-code-cli' }, { binaryName: 'gemini', provider: 'gemini-cli' }, { envKey: 'OLLAMA_BASE_URL', provider: 'ollama' }, diff --git a/src/core/config/AgentOSConfig.ts b/src/core/config/AgentOSConfig.ts index 3eebc79e203..1d047bc213b 100644 --- a/src/core/config/AgentOSConfig.ts +++ b/src/core/config/AgentOSConfig.ts @@ -98,6 +98,7 @@ export function validateEnvironmentConfig(env: Partial): Conf !env.OPENAI_API_KEY && !env.ANTHROPIC_API_KEY && !env.OPENROUTER_API_KEY && + !env.REQUESTY_API_KEY && !env.OLLAMA_BASE_URL ) { warnings.push('No LLM provider API keys configured. AgentOS will have limited functionality.'); @@ -242,9 +243,9 @@ function createModelProviderManagerConfig(env: EnvironmentConfig): AIModelProvid config: { apiKey: env.REQUESTY_API_KEY, baseURL: 'https://router.requesty.ai/v1', - defaultModel: 'openai/gpt-4o', - maxRetries: 3, - timeout: 60000, + defaultModelId: 'openai/gpt-4o', + requestTimeout: 60000, + streamRequestTimeout: 180000, }, }); } diff --git a/src/core/llm/providers/implementations/RequestyProvider.ts b/src/core/llm/providers/implementations/RequestyProvider.ts index 5b913b55ae9..f508c700127 100644 --- a/src/core/llm/providers/implementations/RequestyProvider.ts +++ b/src/core/llm/providers/implementations/RequestyProvider.ts @@ -333,54 +333,64 @@ export class RequestyProvider implements IProvider { ...(options.customModelParams || {}), }; - const stream = await this.makeApiRequest( - '/chat/completions', - 'POST', - // CR8: honor a per-call requestTimeout override over the stream default. - options.requestTimeout ?? this.config.streamRequestTimeout, - payload, - true - ); - - const accumulatedToolCalls: Map = new Map(); - const abortSignal = options.abortSignal; if (abortSignal?.aborted) { yield { id: `requesty-abort-${Date.now()}`, object: 'chat.completion.chunk', created: Math.floor(Date.now()/1000), modelId, choices: [], error: { message: 'Stream aborted prior to first chunk', type: 'abort' }, isFinal: true }; return; } + + const accumulatedToolCalls: Map = new Map(); const abortHandler = () => { /* passive; loop logic handles emission */ }; abortSignal?.addEventListener('abort', abortHandler, { once: true }); - for await (const rawChunk of this.parseSseStream(stream)) { - if (abortSignal?.aborted) { - yield { id: `requesty-abort-${Date.now()}`, object: 'chat.completion.chunk', created: Math.floor(Date.now()/1000), modelId, choices: [], error: { message: 'Stream aborted by caller', type: 'abort' }, isFinal: true }; - break; - } - if (rawChunk.startsWith('data: ') && rawChunk.includes('[DONE]')) { - const doneData = rawChunk.substring('data: '.length).trim(); - if (doneData === '[DONE]') break; - } - if (rawChunk === 'data: [DONE]') { - break; - } + try { + const stream = await this.makeApiRequest( + '/chat/completions', + 'POST', + // CR8: honor a per-call requestTimeout override over the stream default. + options.requestTimeout ?? this.config.streamRequestTimeout, + payload, + true + ); - if (rawChunk.startsWith('data: ')) { - const jsonData = rawChunk.substring('data: '.length); - try { - const apiChunk = JSON.parse(jsonData) as RequestyChatCompletionAPIResponse; - yield this.mapApiToStreamChunkResponse(apiChunk, modelId, accumulatedToolCalls); - // Don't break on finish_reason: with stream_options.include_usage, - // Requesty (like OpenAI) emits a trailing usage-only chunk AFTER - // the finish_reason chunk and BEFORE [DONE]. Breaking here would - // skip the usage chunk and zero out the caller's token totals. The - // [DONE] marker check above is the right termination signal. - } catch (error: unknown) { - console.warn('RequestyProvider: Failed to parse stream chunk JSON, skipping chunk. Data:', jsonData, 'Error:', error); + for await (const rawChunk of this.parseSseStream(stream)) { + if (abortSignal?.aborted) { + yield { id: `requesty-abort-${Date.now()}`, object: 'chat.completion.chunk', created: Math.floor(Date.now()/1000), modelId, choices: [], error: { message: 'Stream aborted by caller', type: 'abort' }, isFinal: true }; + break; + } + if (rawChunk.startsWith('data: ') && rawChunk.includes('[DONE]')) { + const doneData = rawChunk.substring('data: '.length).trim(); + if (doneData === '[DONE]') break; + } + if (rawChunk === 'data: [DONE]') { + break; + } + + if (rawChunk.startsWith('data: ')) { + const jsonData = rawChunk.substring('data: '.length); + try { + const apiChunk = JSON.parse(jsonData) as RequestyChatCompletionAPIResponse; + yield this.mapApiToStreamChunkResponse(apiChunk, modelId, accumulatedToolCalls); + // Don't break on finish_reason: with stream_options.include_usage, + // Requesty (like OpenAI) emits a trailing usage-only chunk AFTER + // the finish_reason chunk and BEFORE [DONE]. Breaking here would + // skip the usage chunk and zero out the caller's token totals. The + // [DONE] marker check above is the right termination signal. + } catch (error: unknown) { + console.warn('RequestyProvider: Failed to parse stream chunk JSON, skipping chunk. Data:', jsonData, 'Error:', error); + } } } + } catch (error: unknown) { + // Provider contract (IProvider) requires a terminal isFinal:true chunk + // even on error. If request setup or SSE iteration throws, surface it as + // a final error chunk instead of letting the generator throw. + const message = error instanceof Error ? error.message : String(error); + yield { id: `requesty-error-${Date.now()}`, object: 'chat.completion.chunk', created: Math.floor(Date.now()/1000), modelId, choices: [], error: { message, type: 'api_error' }, isFinal: true }; + return; + } finally { + abortSignal?.removeEventListener('abort', abortHandler); } - abortSignal?.removeEventListener('abort', abortHandler); } public async generateEmbeddings( @@ -403,13 +413,9 @@ export class RequestyProvider implements IProvider { input: texts, ...(options?.encodingFormat && { encoding_format: options.encodingFormat }), ...(options?.dimensions && { dimensions: options.dimensions }), + ...(options?.inputType && { input_type: options.inputType }), ...(options?.customModelParams || {}), }; - if (options?.inputType && payload.customModelParams && typeof payload.customModelParams === 'object') { - (payload.customModelParams as Record).input_type = options.inputType; - } else if (options?.inputType) { - payload.customModelParams = { input_type: options.inputType }; - } const apiResponseData = await this.makeApiRequest( '/embeddings', @@ -703,7 +709,7 @@ export class RequestyProvider implements IProvider { 'API_REQUEST_FAILED', statusCode, errorType, - { requestEndpoint: endpoint, requestBodyPreview: body ? JSON.stringify(body).substring(0, 200) + '...' : undefined, responseData: errorData, underlyingError: error } + { requestEndpoint: endpoint, requestBodyKeys: body ? Object.keys(body) : undefined, responseData: errorData, underlyingError: error } ); } }