Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 148 additions & 0 deletions hub/src/web/routes/voice.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import { describe, test, expect, beforeEach, afterEach } from 'bun:test'
import { Hono } from 'hono'
import type { WebAppEnv } from '../middleware/auth'
import { createVoiceRoutes } from './voice'

function createApp() {
const app = new Hono<WebAppEnv>()
app.route('/api', createVoiceRoutes())
return app
}

describe('GET /api/voice/backend', () => {
const originalEnv = process.env.VOICE_BACKEND

afterEach(() => {
if (originalEnv === undefined) {
delete process.env.VOICE_BACKEND
} else {
process.env.VOICE_BACKEND = originalEnv
}
})

test('returns elevenlabs by default', async () => {
delete process.env.VOICE_BACKEND
const app = createApp()
const res = await app.request('/api/voice/backend')
expect(res.status).toBe(200)
const body = await res.json() as { backend: string }
expect(body.backend).toBe('elevenlabs')

Check failure on line 29 in hub/src/web/routes/voice.test.ts

View workflow job for this annotation

GitHub Actions / test

error: expect(received).toBe(expected)

Expected: "elevenlabs" Received: "gemini-live" at <anonymous> (/home/runner/work/hapi/hapi/hub/src/web/routes/voice.test.ts:29:30)
})

test('returns gemini-live when configured', async () => {
process.env.VOICE_BACKEND = 'gemini-live'
const app = createApp()
const res = await app.request('/api/voice/backend')
expect(res.status).toBe(200)
const body = await res.json() as { backend: string }
expect(body.backend).toBe('gemini-live')
})

test('returns qwen-realtime when configured', async () => {
process.env.VOICE_BACKEND = 'qwen-realtime'
const app = createApp()
const res = await app.request('/api/voice/backend')
expect(res.status).toBe(200)
const body = await res.json() as { backend: string }
expect(body.backend).toBe('qwen-realtime')
})

test('falls back to elevenlabs for unknown values', async () => {
process.env.VOICE_BACKEND = 'unknown-backend'
const app = createApp()
const res = await app.request('/api/voice/backend')
expect(res.status).toBe(200)
const body = await res.json() as { backend: string }
expect(body.backend).toBe('elevenlabs')

Check failure on line 56 in hub/src/web/routes/voice.test.ts

View workflow job for this annotation

GitHub Actions / test

error: expect(received).toBe(expected)

Expected: "elevenlabs" Received: "gemini-live" at <anonymous> (/home/runner/work/hapi/hapi/hub/src/web/routes/voice.test.ts:56:30)
})
})

describe('POST /api/voice/gemini-token', () => {
const origGemini = process.env.GEMINI_API_KEY
const origGoogle = process.env.GOOGLE_API_KEY

afterEach(() => {
if (origGemini === undefined) delete process.env.GEMINI_API_KEY
else process.env.GEMINI_API_KEY = origGemini
if (origGoogle === undefined) delete process.env.GOOGLE_API_KEY
else process.env.GOOGLE_API_KEY = origGoogle
})

test('returns 400 when no API key configured', async () => {
delete process.env.GEMINI_API_KEY
delete process.env.GOOGLE_API_KEY
const app = createApp()
const res = await app.request('/api/voice/gemini-token', { method: 'POST' })
expect(res.status).toBe(400)
const body = await res.json() as { allowed: boolean; error: string }
expect(body.allowed).toBe(false)
expect(body.error).toContain('not configured')
})

test('returns GEMINI_API_KEY when set', async () => {
process.env.GEMINI_API_KEY = 'test-gemini-key'
delete process.env.GOOGLE_API_KEY
const app = createApp()
const res = await app.request('/api/voice/gemini-token', { method: 'POST' })
expect(res.status).toBe(200)
const body = await res.json() as { allowed: boolean; apiKey: string }
expect(body.allowed).toBe(true)
expect(body.apiKey).toBe('test-gemini-key')
})

test('falls back to GOOGLE_API_KEY', async () => {
delete process.env.GEMINI_API_KEY
process.env.GOOGLE_API_KEY = 'test-google-key'
const app = createApp()
const res = await app.request('/api/voice/gemini-token', { method: 'POST' })
expect(res.status).toBe(200)
const body = await res.json() as { allowed: boolean; apiKey: string }
expect(body.allowed).toBe(true)
expect(body.apiKey).toBe('test-google-key')
})
})

describe('POST /api/voice/qwen-token', () => {
const origDash = process.env.DASHSCOPE_API_KEY
const origQwen = process.env.QWEN_API_KEY

afterEach(() => {
if (origDash === undefined) delete process.env.DASHSCOPE_API_KEY
else process.env.DASHSCOPE_API_KEY = origDash
if (origQwen === undefined) delete process.env.QWEN_API_KEY
else process.env.QWEN_API_KEY = origQwen
})

test('returns 400 when no API key configured', async () => {
delete process.env.DASHSCOPE_API_KEY
delete process.env.QWEN_API_KEY
const app = createApp()
const res = await app.request('/api/voice/qwen-token', { method: 'POST' })
expect(res.status).toBe(400)
const body = await res.json() as { allowed: boolean; error: string }
expect(body.allowed).toBe(false)
expect(body.error).toContain('not configured')
})

test('returns DASHSCOPE_API_KEY when set', async () => {
process.env.DASHSCOPE_API_KEY = 'test-dash-key'
delete process.env.QWEN_API_KEY
const app = createApp()
const res = await app.request('/api/voice/qwen-token', { method: 'POST' })
expect(res.status).toBe(200)
const body = await res.json() as { allowed: boolean; apiKey: string }
expect(body.allowed).toBe(true)
expect(body.apiKey).toBe('test-dash-key')
})

test('falls back to QWEN_API_KEY', async () => {
delete process.env.DASHSCOPE_API_KEY
process.env.QWEN_API_KEY = 'test-qwen-key'
const app = createApp()
const res = await app.request('/api/voice/qwen-token', { method: 'POST' })
expect(res.status).toBe(200)
const body = await res.json() as { allowed: boolean; apiKey: string }
expect(body.allowed).toBe(true)
expect(body.apiKey).toBe('test-qwen-key')
})
})
52 changes: 51 additions & 1 deletion hub/src/web/routes/voice.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import type { WebAppEnv } from '../middleware/auth'
import {
ELEVENLABS_API_BASE,
VOICE_AGENT_NAME,
buildVoiceAgentConfig
buildVoiceAgentConfig,
DEFAULT_VOICE_BACKEND
} from '@hapi/protocol/voice'
import type { VoiceBackendType } from '@hapi/protocol/voice'

const tokenRequestSchema = z.object({
customAgentId: z.string().optional(),
Expand Down Expand Up @@ -116,6 +118,54 @@ async function getOrCreateAgentId(apiKey: string): Promise<string | null> {
export function createVoiceRoutes(): Hono<WebAppEnv> {
const app = new Hono<WebAppEnv>()

// Return the configured voice backend type
app.get('/voice/backend', (c) => {
const raw = process.env.VOICE_BACKEND
const backend: VoiceBackendType =
raw === 'gemini-live' ? 'gemini-live'
: raw === 'qwen-realtime' ? 'qwen-realtime'
: DEFAULT_VOICE_BACKEND
return c.json({ backend })
})

// Get Gemini API key for Gemini Live voice sessions
// Gemini Live API does not support ephemeral tokens, so we proxy the key.
// The key is short-lived in the browser session and never persisted client-side.
app.post('/voice/gemini-token', async (c) => {
const apiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY
if (!apiKey) {
return c.json({
allowed: false,
error: 'Gemini API key not configured (set GEMINI_API_KEY or GOOGLE_API_KEY)'
}, 400)
}

return c.json({
allowed: true,
apiKey,
// Optional overrides for proxy/relay setups
wsUrl: process.env.GEMINI_LIVE_WS_URL || undefined,
baseUrl: process.env.GEMINI_API_BASE || undefined
})
})

// Get Qwen (DashScope) API key for Qwen Realtime voice sessions
app.post('/voice/qwen-token', async (c) => {
const apiKey = process.env.DASHSCOPE_API_KEY || process.env.QWEN_API_KEY
if (!apiKey) {
return c.json({
allowed: false,
error: 'DashScope API key not configured (set DASHSCOPE_API_KEY or QWEN_API_KEY)'
}, 400)
}

return c.json({
allowed: true,
apiKey,
wsUrl: process.env.QWEN_REALTIME_WS_URL || undefined
})
})

// Get ElevenLabs ConvAI conversation token
app.post('/voice/token', async (c) => {
const json = await c.req.json().catch(() => null)
Expand Down
108 changes: 103 additions & 5 deletions hub/src/web/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,61 @@ import { createPushRoutes } from './routes/push'
import { createVoiceRoutes } from './routes/voice'
import type { SSEManager } from '../sse/sseManager'
import type { VisibilityTracker } from '../visibility/visibilityTracker'
import type { Server as BunServer } from 'bun'
import type { Server as BunServer, ServerWebSocket } from 'bun'
import type { Server as SocketEngine } from '@socket.io/bun-engine'
import type { WebSocketData } from '@socket.io/bun-engine'

// Qwen Realtime WebSocket proxy — bridges browser (no custom headers) to DashScope (requires Authorization header)
function createQwenProxyWebSocketHandler() {
const QWEN_WS_BASE = 'wss://dashscope.aliyuncs.com/api-ws/v1/realtime'
// Map browser WS → upstream WS
const upstreamMap = new WeakMap<ServerWebSocket<unknown>, WebSocket>()

return {
open(clientWs: ServerWebSocket<unknown>) {
const data = clientWs.data as { apiKey: string; model: string }
const upstreamUrl = `${process.env.QWEN_REALTIME_WS_URL || QWEN_WS_BASE}?model=${encodeURIComponent(data.model)}`

const upstream = new WebSocket(upstreamUrl, {
headers: { 'Authorization': `Bearer ${data.apiKey}` }
} as unknown as string[])

upstreamMap.set(clientWs, upstream)

upstream.onopen = () => {
// Connection ready — upstream will send session.created
}
upstream.onmessage = (event) => {
// Forward upstream → client
try {
if (clientWs.readyState === 1) {
clientWs.send(typeof event.data === 'string' ? event.data : new Uint8Array(event.data as ArrayBuffer))
}
} catch { /* client gone */ }
}
upstream.onerror = () => {
try { clientWs.close(1011, 'Upstream error') } catch { /* */ }
}
upstream.onclose = (event) => {
try { clientWs.close(event.code, event.reason) } catch { /* */ }
upstreamMap.delete(clientWs)
}
},
message(clientWs: ServerWebSocket<unknown>, message: string | ArrayBuffer | Uint8Array) {
const upstream = upstreamMap.get(clientWs)
if (upstream?.readyState === WebSocket.OPEN) {
upstream.send(typeof message === 'string' ? message : message)
}
},
close(clientWs: ServerWebSocket<unknown>, code: number, reason: string) {
const upstream = upstreamMap.get(clientWs)
if (upstream) {
try { upstream.close(code, reason) } catch { /* */ }
upstreamMap.delete(clientWs)
}
}
}
}
import { loadEmbeddedAssetMap, type EmbeddedWebAsset } from './embeddedAssets'
import { isBunCompiled } from '../utils/bunCompiled'
import type { Store } from '../store'
Expand Down Expand Up @@ -230,16 +282,62 @@ export async function startWebServer(options: {

const socketHandler = options.socketEngine.handler()

const server = Bun.serve({
// Wrap socket.io websocket handler to also support Qwen Realtime proxy
const originalWsHandler = socketHandler.websocket
const qwenProxyHandler = createQwenProxyWebSocketHandler()

// eslint-disable-next-line @typescript-eslint/no-explicit-any
const server = (Bun.serve as any)({
hostname: configuration.listenHost,
port: configuration.listenPort,
idleTimeout: Math.max(30, socketHandler.idleTimeout),
maxRequestBodySize: Math.max(socketHandler.maxRequestBodySize, 68 * 1024 * 1024),
websocket: socketHandler.websocket,
fetch: (req, server) => {
websocket: {
...originalWsHandler,
open(ws: unknown) {
const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean }>
if (wsAny.data?._qwenProxy) {
qwenProxyHandler.open(wsAny)
} else {
originalWsHandler.open?.(ws as never)
}
},
message(ws: unknown, message: unknown) {
const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean }>
if (wsAny.data?._qwenProxy) {
qwenProxyHandler.message(wsAny, message as string)
} else {
originalWsHandler.message?.(ws as never, message as never)
}
},
close(ws: unknown, code: number, reason: string) {
const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean }>
if (wsAny.data?._qwenProxy) {
qwenProxyHandler.close(wsAny, code, reason)
} else {
originalWsHandler.close?.(ws as never, code as never, reason as never)
}
}
},
fetch: (req: Request, server: { upgrade: (req: Request, opts?: unknown) => boolean }) => {
const url = new URL(req.url)
if (url.pathname.startsWith('/socket.io/')) {
return socketHandler.fetch(req, server)
return socketHandler.fetch(req, server as never)
}
// Qwen Realtime WebSocket proxy
if (url.pathname === '/api/voice/qwen-ws') {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[BLOCKER] This branch upgrades /api/voice/qwen-ws before the request ever reaches app.fetch(req), so it bypasses createAuthMiddleware() entirely. As written, any client that can reach the hub can open a proxied DashScope session with the server-side API key.

Suggested fix:

const token = url.searchParams.get('token')
if (!token) {
    return new Response('Missing authorization token', { status: 401 })
}

await jwtVerify(token, options.jwtSecret, { algorithms: ['HS256'] })
const upgraded = server.upgrade(req, {
    data: { _qwenProxy: true, apiKey, model }
})

const apiKey = process.env.DASHSCOPE_API_KEY || process.env.QWEN_API_KEY
const model = url.searchParams.get('model') || 'qwen3.5-omni-plus-realtime'
if (!apiKey) {
return new Response('DashScope API key not configured', { status: 400 })
}
const upgraded = server.upgrade(req, {
data: { _qwenProxy: true, apiKey, model }
})
if (!upgraded) {
return new Response('WebSocket upgrade failed', { status: 500 })
}
return undefined as unknown as Response
}
return app.fetch(req)
}
Expand Down
Loading
Loading