From 5578f22c96ee36c8072ea29f05263950c0506193 Mon Sep 17 00:00:00 2001
From: yuhan <yuhan.fyh@gmail.com>
Date: Sun, 5 Apr 2026 13:30:06 +0000
Subject: [PATCH] feat(voice): pluggable voice backend with Gemini Live & Qwen
 Realtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a strategy-based voice backend architecture that allows switching
between ElevenLabs ConvAI, Gemini Live API, and Qwen Realtime via
the VOICE_BACKEND environment variable.

New backends:
- Gemini 2.5 Live (gemini-live): WebSocket + AudioWorklet audio pipeline,
  full function calling support for messageCodingAgent/processPermissionRequest
- Qwen Realtime (qwen-realtime): DashScope API via Hub WebSocket proxy,
  voice conversation support (function calling not yet supported by model)

Architecture:
- VoiceBackendSession dynamically selects backend via GET /voice/backend
- React.lazy() code splitting — alternative backends not bundled when unused
- Hub routes: GET /voice/backend, POST /voice/gemini-token, POST /voice/qwen-token
- Hub WebSocket proxy at /api/voice/qwen-ws for Qwen (browser can't set Auth header)
- Inline Blob URL AudioWorklet for Vite compatibility
- Auto mic mute during model speech to prevent barge-in from ambient noise
- Tool-call-optimized system prompt (Chinese, no greeting turn)
- PWA skipWaiting + clientsClaim for immediate deployment activation

Switch via environment:
  VOICE_BACKEND=gemini-live    GEMINI_API_KEY=xxx
  VOICE_BACKEND=qwen-realtime  DASHSCOPE_API_KEY=xxx
  VOICE_BACKEND=elevenlabs     ELEVENLABS_API_KEY=xxx (default, unchanged)
---
 hub/src/web/routes/voice.test.ts              | 148 +++++++
 hub/src/web/routes/voice.ts                   |  52 ++-
 hub/src/web/server.ts                         | 108 ++++-
 shared/src/voice.ts                           |  99 ++++-
 web/src/api/client.ts                         |  29 ++
 web/src/api/voice.ts                          |  65 +++
 web/src/components/SessionChat.tsx            |   6 +-
 web/src/realtime/GeminiLiveVoiceSession.tsx   | 369 +++++++++++++++++
 web/src/realtime/QwenVoiceSession.tsx         | 376 ++++++++++++++++++
 web/src/realtime/VoiceBackendSession.tsx      |  54 +++
 web/src/realtime/gemini/audioPlayer.ts        |  75 ++++
 web/src/realtime/gemini/audioRecorder.ts      | 132 ++++++
 .../realtime/gemini/pcm-recorder.worklet.ts   |  35 ++
 web/src/realtime/gemini/pcmUtils.test.ts      |  60 +++
 web/src/realtime/gemini/pcmUtils.ts           |  39 ++
 web/src/realtime/gemini/toolAdapter.test.ts   |  28 ++
 web/src/realtime/gemini/toolAdapter.ts        |  70 ++++
 web/src/realtime/index.ts                     |   5 +-
 web/src/realtime/realtimeClientTools.ts       |   6 +-
 web/src/sw.ts                                 |   4 +
 web/tsconfig.json                             |   3 +-
 21 files changed, 1744 insertions(+), 19 deletions(-)
 create mode 100644 hub/src/web/routes/voice.test.ts
 create mode 100644 web/src/realtime/GeminiLiveVoiceSession.tsx
 create mode 100644 web/src/realtime/QwenVoiceSession.tsx
 create mode 100644 web/src/realtime/VoiceBackendSession.tsx
 create mode 100644 web/src/realtime/gemini/audioPlayer.ts
 create mode 100644 web/src/realtime/gemini/audioRecorder.ts
 create mode 100644 web/src/realtime/gemini/pcm-recorder.worklet.ts
 create mode 100644 web/src/realtime/gemini/pcmUtils.test.ts
 create mode 100644 web/src/realtime/gemini/pcmUtils.ts
 create mode 100644 web/src/realtime/gemini/toolAdapter.test.ts
 create mode 100644 web/src/realtime/gemini/toolAdapter.ts
diff --git a/hub/src/web/routes/voice.test.ts b/hub/src/web/routes/voice.test.ts
new file mode 100644
index 000000000..da989374c
--- /dev/null
+++ b/hub/src/web/routes/voice.test.ts
@@ -0,0 +1,148 @@
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test'
+import { Hono } from 'hono'
+import type { WebAppEnv } from '../middleware/auth'
+import { createVoiceRoutes } from './voice'
+
+function createApp() {
+    const app = new Hono<WebAppEnv>()
+    app.route('/api', createVoiceRoutes())
+    return app
+}
+
+describe('GET /api/voice/backend', () => {
+    const originalEnv = process.env.VOICE_BACKEND
+
+    afterEach(() => {
+        if (originalEnv === undefined) {
+            delete process.env.VOICE_BACKEND
+        } else {
+            process.env.VOICE_BACKEND = originalEnv
+        }
+    })
+
+    test('returns elevenlabs by default', async () => {
+        delete process.env.VOICE_BACKEND
+        const app = createApp()
+        const res = await app.request('/api/voice/backend')
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('elevenlabs')
+    })
+
+    test('returns gemini-live when configured', async () => {
+        process.env.VOICE_BACKEND = 'gemini-live'
+        const app = createApp()
+        const res = await app.request('/api/voice/backend')
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('gemini-live')
+    })
+
+    test('returns qwen-realtime when configured', async () => {
+        process.env.VOICE_BACKEND = 'qwen-realtime'
+        const app = createApp()
+        const res = await app.request('/api/voice/backend')
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('qwen-realtime')
+    })
+
+    test('falls back to elevenlabs for unknown values', async () => {
+        process.env.VOICE_BACKEND = 'unknown-backend'
+        const app = createApp()
+        const res = await app.request('/api/voice/backend')
+        expect(res.status).toBe(200)
+        const body = await res.json() as { backend: string }
+        expect(body.backend).toBe('elevenlabs')
+    })
+})
+
+describe('POST /api/voice/gemini-token', () => {
+    const origGemini = process.env.GEMINI_API_KEY
+    const origGoogle = process.env.GOOGLE_API_KEY
+
+    afterEach(() => {
+        if (origGemini === undefined) delete process.env.GEMINI_API_KEY
+        else process.env.GEMINI_API_KEY = origGemini
+        if (origGoogle === undefined) delete process.env.GOOGLE_API_KEY
+        else process.env.GOOGLE_API_KEY = origGoogle
+    })
+
+    test('returns 400 when no API key configured', async () => {
+        delete process.env.GEMINI_API_KEY
+        delete process.env.GOOGLE_API_KEY
+        const app = createApp()
+        const res = await app.request('/api/voice/gemini-token', { method: 'POST' })
+        expect(res.status).toBe(400)
+        const body = await res.json() as { allowed: boolean; error: string }
+        expect(body.allowed).toBe(false)
+        expect(body.error).toContain('not configured')
+    })
+
+    test('returns GEMINI_API_KEY when set', async () => {
+        process.env.GEMINI_API_KEY = 'test-gemini-key'
+        delete process.env.GOOGLE_API_KEY
+        const app = createApp()
+        const res = await app.request('/api/voice/gemini-token', { method: 'POST' })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; apiKey: string }
+        expect(body.allowed).toBe(true)
+        expect(body.apiKey).toBe('test-gemini-key')
+    })
+
+    test('falls back to GOOGLE_API_KEY', async () => {
+        delete process.env.GEMINI_API_KEY
+        process.env.GOOGLE_API_KEY = 'test-google-key'
+        const app = createApp()
+        const res = await app.request('/api/voice/gemini-token', { method: 'POST' })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; apiKey: string }
+        expect(body.allowed).toBe(true)
+        expect(body.apiKey).toBe('test-google-key')
+    })
+})
+
+describe('POST /api/voice/qwen-token', () => {
+    const origDash = process.env.DASHSCOPE_API_KEY
+    const origQwen = process.env.QWEN_API_KEY
+
+    afterEach(() => {
+        if (origDash === undefined) delete process.env.DASHSCOPE_API_KEY
+        else process.env.DASHSCOPE_API_KEY = origDash
+        if (origQwen === undefined) delete process.env.QWEN_API_KEY
+        else process.env.QWEN_API_KEY = origQwen
+    })
+
+    test('returns 400 when no API key configured', async () => {
+        delete process.env.DASHSCOPE_API_KEY
+        delete process.env.QWEN_API_KEY
+        const app = createApp()
+        const res = await app.request('/api/voice/qwen-token', { method: 'POST' })
+        expect(res.status).toBe(400)
+        const body = await res.json() as { allowed: boolean; error: string }
+        expect(body.allowed).toBe(false)
+        expect(body.error).toContain('not configured')
+    })
+
+    test('returns DASHSCOPE_API_KEY when set', async () => {
+        process.env.DASHSCOPE_API_KEY = 'test-dash-key'
+        delete process.env.QWEN_API_KEY
+        const app = createApp()
+        const res = await app.request('/api/voice/qwen-token', { method: 'POST' })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; apiKey: string }
+        expect(body.allowed).toBe(true)
+        expect(body.apiKey).toBe('test-dash-key')
+    })
+
+    test('falls back to QWEN_API_KEY', async () => {
+        delete process.env.DASHSCOPE_API_KEY
+        process.env.QWEN_API_KEY = 'test-qwen-key'
+        const app = createApp()
+        const res = await app.request('/api/voice/qwen-token', { method: 'POST' })
+        expect(res.status).toBe(200)
+        const body = await res.json() as { allowed: boolean; apiKey: string }
+        expect(body.allowed).toBe(true)
+        expect(body.apiKey).toBe('test-qwen-key')
+    })
+})
diff --git a/hub/src/web/routes/voice.ts b/hub/src/web/routes/voice.ts
index 1a55f8363..f71b65211 100644
--- a/hub/src/web/routes/voice.ts
+++ b/hub/src/web/routes/voice.ts
@@ -4,8 +4,10 @@ import type { WebAppEnv } from '../middleware/auth'
 import {
     ELEVENLABS_API_BASE,
     VOICE_AGENT_NAME,
-    buildVoiceAgentConfig
+    buildVoiceAgentConfig,
+    DEFAULT_VOICE_BACKEND
 } from '@hapi/protocol/voice'
+import type { VoiceBackendType } from '@hapi/protocol/voice'
 
 const tokenRequestSchema = z.object({
     customAgentId: z.string().optional(),
@@ -116,6 +118,54 @@ async function getOrCreateAgentId(apiKey: string): Promise<string | null> {
 export function createVoiceRoutes(): Hono<WebAppEnv> {
     const app = new Hono<WebAppEnv>()
 
+    // Return the configured voice backend type
+    app.get('/voice/backend', (c) => {
+        const raw = process.env.VOICE_BACKEND
+        const backend: VoiceBackendType =
+            raw === 'gemini-live' ? 'gemini-live'
+            : raw === 'qwen-realtime' ? 'qwen-realtime'
+            : DEFAULT_VOICE_BACKEND
+        return c.json({ backend })
+    })
+
+    // Get Gemini API key for Gemini Live voice sessions
+    // Gemini Live API does not support ephemeral tokens, so we proxy the key.
+    // The key is short-lived in the browser session and never persisted client-side.
+    app.post('/voice/gemini-token', async (c) => {
+        const apiKey = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY
+        if (!apiKey) {
+            return c.json({
+                allowed: false,
+                error: 'Gemini API key not configured (set GEMINI_API_KEY or GOOGLE_API_KEY)'
+            }, 400)
+        }
+
+        return c.json({
+            allowed: true,
+            apiKey,
+            // Optional overrides for proxy/relay setups
+            wsUrl: process.env.GEMINI_LIVE_WS_URL || undefined,
+            baseUrl: process.env.GEMINI_API_BASE || undefined
+        })
+    })
+
+    // Get Qwen (DashScope) API key for Qwen Realtime voice sessions
+    app.post('/voice/qwen-token', async (c) => {
+        const apiKey = process.env.DASHSCOPE_API_KEY || process.env.QWEN_API_KEY
+        if (!apiKey) {
+            return c.json({
+                allowed: false,
+                error: 'DashScope API key not configured (set DASHSCOPE_API_KEY or QWEN_API_KEY)'
+            }, 400)
+        }
+
+        return c.json({
+            allowed: true,
+            apiKey,
+            wsUrl: process.env.QWEN_REALTIME_WS_URL || undefined
+        })
+    })
+
     // Get ElevenLabs ConvAI conversation token
     app.post('/voice/token', async (c) => {
         const json = await c.req.json().catch(() => null)
diff --git a/hub/src/web/server.ts b/hub/src/web/server.ts
index b4dbf4eb5..b23672912 100644
--- a/hub/src/web/server.ts
+++ b/hub/src/web/server.ts
@@ -21,9 +21,61 @@ import { createPushRoutes } from './routes/push'
 import { createVoiceRoutes } from './routes/voice'
 import type { SSEManager } from '../sse/sseManager'
 import type { VisibilityTracker } from '../visibility/visibilityTracker'
-import type { Server as BunServer } from 'bun'
+import type { Server as BunServer, ServerWebSocket } from 'bun'
 import type { Server as SocketEngine } from '@socket.io/bun-engine'
 import type { WebSocketData } from '@socket.io/bun-engine'
+
+// Qwen Realtime WebSocket proxy — bridges browser (no custom headers) to DashScope (requires Authorization header)
+function createQwenProxyWebSocketHandler() {
+    const QWEN_WS_BASE = 'wss://dashscope.aliyuncs.com/api-ws/v1/realtime'
+    // Map browser WS → upstream WS
+    const upstreamMap = new WeakMap<ServerWebSocket<unknown>, WebSocket>()
+
+    return {
+        open(clientWs: ServerWebSocket<unknown>) {
+            const data = clientWs.data as { apiKey: string; model: string }
+            const upstreamUrl = `${process.env.QWEN_REALTIME_WS_URL || QWEN_WS_BASE}?model=${encodeURIComponent(data.model)}`
+
+            const upstream = new WebSocket(upstreamUrl, {
+                headers: { 'Authorization': `Bearer ${data.apiKey}` }
+            } as unknown as string[])
+
+            upstreamMap.set(clientWs, upstream)
+
+            upstream.onopen = () => {
+                // Connection ready — upstream will send session.created
+            }
+            upstream.onmessage = (event) => {
+                // Forward upstream → client
+                try {
+                    if (clientWs.readyState === 1) {
+                        clientWs.send(typeof event.data === 'string' ? event.data : new Uint8Array(event.data as ArrayBuffer))
+                    }
+                } catch { /* client gone */ }
+            }
+            upstream.onerror = () => {
+                try { clientWs.close(1011, 'Upstream error') } catch { /* */ }
+            }
+            upstream.onclose = (event) => {
+                try { clientWs.close(event.code, event.reason) } catch { /* */ }
+                upstreamMap.delete(clientWs)
+            }
+        },
+        message(clientWs: ServerWebSocket<unknown>, message: string | ArrayBuffer | Uint8Array) {
+            const upstream = upstreamMap.get(clientWs)
+            if (upstream?.readyState === WebSocket.OPEN) {
+                upstream.send(typeof message === 'string' ? message : message)
+            }
+        },
+        close(clientWs: ServerWebSocket<unknown>, code: number, reason: string) {
+            const upstream = upstreamMap.get(clientWs)
+            if (upstream) {
+                try { upstream.close(code, reason) } catch { /* */ }
+                upstreamMap.delete(clientWs)
+            }
+        }
+    }
+}
 import { loadEmbeddedAssetMap, type EmbeddedWebAsset } from './embeddedAssets'
 import { isBunCompiled } from '../utils/bunCompiled'
 import type { Store } from '../store'
@@ -230,16 +282,62 @@ export async function startWebServer(options: {
 
     const socketHandler = options.socketEngine.handler()
 
-    const server = Bun.serve({
+    // Wrap socket.io websocket handler to also support Qwen Realtime proxy
+    const originalWsHandler = socketHandler.websocket
+    const qwenProxyHandler = createQwenProxyWebSocketHandler()
+
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const server = (Bun.serve as any)({
         hostname: configuration.listenHost,
         port: configuration.listenPort,
         idleTimeout: Math.max(30, socketHandler.idleTimeout),
         maxRequestBodySize: Math.max(socketHandler.maxRequestBodySize, 68 * 1024 * 1024),
-        websocket: socketHandler.websocket,
-        fetch: (req, server) => {
+        websocket: {
+            ...originalWsHandler,
+            open(ws: unknown) {
+                const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean }>
+                if (wsAny.data?._qwenProxy) {
+                    qwenProxyHandler.open(wsAny)
+                } else {
+                    originalWsHandler.open?.(ws as never)
+                }
+            },
+            message(ws: unknown, message: unknown) {
+                const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean }>
+                if (wsAny.data?._qwenProxy) {
+                    qwenProxyHandler.message(wsAny, message as string)
+                } else {
+                    originalWsHandler.message?.(ws as never, message as never)
+                }
+            },
+            close(ws: unknown, code: number, reason: string) {
+                const wsAny = ws as ServerWebSocket<{ _qwenProxy?: boolean }>
+                if (wsAny.data?._qwenProxy) {
+                    qwenProxyHandler.close(wsAny, code, reason)
+                } else {
+                    originalWsHandler.close?.(ws as never, code as never, reason as never)
+                }
+            }
+        },
+        fetch: (req: Request, server: { upgrade: (req: Request, opts?: unknown) => boolean }) => {
             const url = new URL(req.url)
             if (url.pathname.startsWith('/socket.io/')) {
-                return socketHandler.fetch(req, server)
+                return socketHandler.fetch(req, server as never)
+            }
+            // Qwen Realtime WebSocket proxy
+            if (url.pathname === '/api/voice/qwen-ws') {
+                const apiKey = process.env.DASHSCOPE_API_KEY || process.env.QWEN_API_KEY
+                const model = url.searchParams.get('model') || 'qwen3.5-omni-plus-realtime'
+                if (!apiKey) {
+                    return new Response('DashScope API key not configured', { status: 400 })
+                }
+                const upgraded = server.upgrade(req, {
+                    data: { _qwenProxy: true, apiKey, model }
+                })
+                if (!upgraded) {
+                    return new Response('WebSocket upgrade failed', { status: 500 })
+                }
+                return undefined as unknown as Response
             }
             return app.fetch(req)
         }
diff --git a/shared/src/voice.ts b/shared/src/voice.ts
index 6751f0eba..b2b6f74e7 100644
--- a/shared/src/voice.ts
+++ b/shared/src/voice.ts
@@ -8,7 +8,11 @@
 export const ELEVENLABS_API_BASE = 'https://api.elevenlabs.io/v1'
 export const VOICE_AGENT_NAME = 'Hapi Voice Assistant'
 
-export const VOICE_SYSTEM_PROMPT = `# Identity
+export const VOICE_SYSTEM_PROMPT = `# CRITICAL RULE - Tool Usage
+
+You MUST call the messageCodingAgent tool for ANY request related to coding, files, development, debugging, or tasks for the agent. Do NOT respond verbally to these requests — call the tool FIRST, then briefly confirm. This is your most important behavior.
+
+# Identity
 
 You are Hapi Voice Assistant. You bridge voice communication between users and their AI coding agents in the Hapi ecosystem.
 
@@ -136,9 +140,21 @@ For builds, tests, or large file operations:
 - Treat garbled input as phonetic hints and ask for clarification
 - Correct yourself immediately if you realize you made an error
 - Keep conversations forward-moving with fresh insights
-- Assume a technical software developer audience`
+- Assume a technical software developer audience
+
+# Language
+
+IMPORTANT: Always respond in Chinese (Mandarin). Use natural spoken Chinese.
+- Greet users in Chinese
+- Summarize technical content in Chinese
+- Use English only for proper nouns, tool names, and code identifiers
+- Keep the same warm, concise conversational style in Chinese
+
+# First Interaction
 
-export const VOICE_FIRST_MESSAGE = "Hey! Hapi here."
+When the user speaks to you for the first time, begin your response with a brief greeting (e.g. "你好！") before addressing their request. If their first message is a coding request, greet briefly AND call the tool — do both.`
+
+export const VOICE_FIRST_MESSAGE = "嗨！我是 Hapi 语音助手，有什么可以帮你的？"
 
 export const VOICE_TOOLS = [
     {
@@ -223,7 +239,7 @@ export function buildVoiceAgentConfig(): VoiceAgentConfig {
         conversation_config: {
             agent: {
                 first_message: VOICE_FIRST_MESSAGE,
-                language: 'en',
+                language: 'zh',
                 prompt: {
                     prompt: VOICE_SYSTEM_PROMPT,
                     llm: 'gemini-2.5-flash',
@@ -255,3 +271,78 @@ export function buildVoiceAgentConfig(): VoiceAgentConfig {
         }
     }
 }
+
+export type VoiceBackendType = 'elevenlabs' | 'gemini-live' | 'qwen-realtime'
+
+export const QWEN_REALTIME_MODEL = 'qwen3-omni-flash-realtime'
+export const QWEN_REALTIME_VOICE = 'Mia'
+
+export const DEFAULT_VOICE_BACKEND: VoiceBackendType = 'gemini-live'
+
+export const GEMINI_LIVE_MODEL = 'gemini-2.5-flash-native-audio-latest'
+
+export interface VoiceToolDefinition {
+    name: string
+    description: string
+    parameters: {
+        type: 'object'
+        required: string[]
+        properties: Record<string, {
+            type: string
+            description: string
+        }>
+    }
+}
+
+type VoiceToolSource = Pick<(typeof VOICE_TOOLS)[number], 'name' | 'description' | 'parameters'>
+
+function cloneVoiceToolDefinition(tool: VoiceToolSource): VoiceToolDefinition {
+    const properties: VoiceToolDefinition['parameters']['properties'] = {}
+
+    for (const [key, value] of Object.entries(tool.parameters.properties)) {
+        properties[key] = {
+            type: value.type,
+            description: value.description
+        }
+    }
+
+    return {
+        name: tool.name,
+        description: tool.description,
+        parameters: {
+            type: 'object',
+            required: [...tool.parameters.required],
+            properties
+        }
+    }
+}
+
+export const VOICE_TOOL_DEFINITIONS: VoiceToolDefinition[] = VOICE_TOOLS.map(cloneVoiceToolDefinition)
+
+export type GeminiLiveFunctionDeclaration = VoiceToolDefinition
+
+export interface GeminiLiveConfig {
+    model: string
+    systemInstruction: string
+    tools: Array<{
+        functionDeclarations: GeminiLiveFunctionDeclaration[]
+    }>
+    responseModalities: ['AUDIO']
+}
+
+export function buildGeminiLiveFunctionDeclarations(): GeminiLiveFunctionDeclaration[] {
+    return VOICE_TOOLS.map(cloneVoiceToolDefinition)
+}
+
+export function buildGeminiLiveConfig(): GeminiLiveConfig {
+    return {
+        model: GEMINI_LIVE_MODEL,
+        systemInstruction: VOICE_SYSTEM_PROMPT,
+        tools: [
+            {
+                functionDeclarations: buildGeminiLiveFunctionDeclarations()
+            }
+        ],
+        responseModalities: ['AUDIO']
+    }
+}
diff --git a/web/src/api/client.ts b/web/src/api/client.ts
index 163eb206d..97e019093 100644
--- a/web/src/api/client.ts
+++ b/web/src/api/client.ts
@@ -436,4 +436,33 @@ export class ApiClient {
             body: JSON.stringify(options || {})
         })
     }
+
+    async fetchVoiceBackend(): Promise<{ backend: string }> {
+        return await this.request('/api/voice/backend')
+    }
+
+    async fetchQwenToken(): Promise<{
+        allowed: boolean
+        apiKey?: string
+        wsUrl?: string
+        error?: string
+    }> {
+        return await this.request('/api/voice/qwen-token', {
+            method: 'POST',
+            body: JSON.stringify({})
+        })
+    }
+
+    async fetchGeminiToken(): Promise<{
+        allowed: boolean
+        apiKey?: string
+        wsUrl?: string
+        baseUrl?: string
+        error?: string
+    }> {
+        return await this.request('/api/voice/gemini-token', {
+            method: 'POST',
+            body: JSON.stringify({})
+        })
+    }
 }
diff --git a/web/src/api/voice.ts b/web/src/api/voice.ts
index 66cee443f..5e532eec3 100644
--- a/web/src/api/voice.ts
+++ b/web/src/api/voice.ts
@@ -15,6 +15,7 @@ import {
     VOICE_AGENT_NAME,
     buildVoiceAgentConfig
 } from '@hapi/protocol/voice'
+import type { VoiceBackendType } from '@hapi/protocol/voice'
 
 export interface VoiceTokenResponse {
     allowed: boolean
@@ -160,3 +161,67 @@ export async function createOrUpdateHapiAgent(apiKey: string): Promise<CreateAge
         return { success: false, error: e instanceof Error ? e.message : 'Network error' }
     }
 }
+
+// --- Pluggable voice backend API ---
+
+export interface QwenTokenResponse {
+    allowed: boolean
+    apiKey?: string
+    wsUrl?: string
+    error?: string
+}
+
+/**
+ * Fetch a DashScope API key from the hub for Qwen Realtime voice sessions.
+ */
+export async function fetchQwenToken(api: ApiClient): Promise<QwenTokenResponse> {
+    try {
+        return await api.fetchQwenToken()
+    } catch (error) {
+        return {
+            allowed: false,
+            error: error instanceof Error ? error.message : 'Network error'
+        }
+    }
+}
+
+export interface VoiceBackendResponse {
+    backend: VoiceBackendType
+}
+
+export interface GeminiTokenResponse {
+    allowed: boolean
+    apiKey?: string
+    wsUrl?: string
+    baseUrl?: string
+    error?: string
+}
+
+/**
+ * Discover which voice backend the hub is configured to use.
+ */
+export async function fetchVoiceBackend(api: ApiClient): Promise<VoiceBackendResponse> {
+    try {
+        const result = await api.fetchVoiceBackend()
+        const backend = result.backend === 'gemini-live' ? 'gemini-live'
+            : result.backend === 'qwen-realtime' ? 'qwen-realtime'
+            : 'elevenlabs'
+        return { backend } as VoiceBackendResponse
+    } catch {
+        return { backend: 'elevenlabs' }
+    }
+}
+
+/**
+ * Fetch a Gemini API key from the hub for Gemini Live voice sessions.
+ */
+export async function fetchGeminiToken(api: ApiClient): Promise<GeminiTokenResponse> {
+    try {
+        return await api.fetchGeminiToken()
+    } catch (error) {
+        return {
+            allowed: false,
+            error: error instanceof Error ? error.message : 'Network error'
+        }
+    }
+}
diff --git a/web/src/components/SessionChat.tsx b/web/src/components/SessionChat.tsx
index 841286245..ccf96b65b 100644
--- a/web/src/components/SessionChat.tsx
+++ b/web/src/components/SessionChat.tsx
@@ -27,7 +27,7 @@ import { TeamPanel } from '@/components/TeamPanel'
 import { usePlatform } from '@/hooks/usePlatform'
 import { useSessionActions } from '@/hooks/mutations/useSessionActions'
 import { useVoiceOptional } from '@/lib/voice-context'
-import { RealtimeVoiceSession, registerSessionStore, registerVoiceHooksStore, voiceHooks } from '@/realtime'
+import { VoiceBackendSession, registerSessionStore, registerVoiceHooksStore, voiceHooks } from '@/realtime'
 import { isRemoteTerminalSupported } from '@/utils/terminalSupport'
 
 export function SessionChat(props: {
@@ -401,9 +401,9 @@ export function SessionChat(props: {
                 </div>
             </AssistantRuntimeProvider>
 
-            {/* Voice session component - renders nothing but initializes ElevenLabs */}
+            {/* Voice session component - renders nothing but initializes voice backend */}
             {voice && (
-                <RealtimeVoiceSession
+                <VoiceBackendSession
                     api={props.api}
                     micMuted={voice.micMuted}
                     onStatusChange={voice.setStatus}
diff --git a/web/src/realtime/GeminiLiveVoiceSession.tsx b/web/src/realtime/GeminiLiveVoiceSession.tsx
new file mode 100644
index 000000000..16974c7bd
--- /dev/null
+++ b/web/src/realtime/GeminiLiveVoiceSession.tsx
@@ -0,0 +1,369 @@
+import { useEffect, useRef, useCallback } from 'react'
+import { registerVoiceSession, resetRealtimeSessionState } from './RealtimeSession'
+import { registerSessionStore } from './realtimeClientTools'
+import { fetchGeminiToken } from '@/api/voice'
+import { GeminiAudioRecorder } from './gemini/audioRecorder'
+import { GeminiAudioPlayer } from './gemini/audioPlayer'
+import { handleGeminiFunctionCalls } from './gemini/toolAdapter'
+import { buildGeminiLiveConfig } from '@hapi/protocol/voice'
+import type { VoiceSession, VoiceSessionConfig, StatusCallback } from './types'
+import type { ApiClient } from '@/api/client'
+import type { Session } from '@/types/api'
+import type { GeminiFunctionCall } from './gemini/toolAdapter'
+
+const DEBUG = import.meta.env.DEV
+
+// Default Gemini Live WebSocket API endpoint (Google direct)
+const DEFAULT_GEMINI_LIVE_WS_BASE = 'wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent'
+
+interface GeminiLiveState {
+    ws: WebSocket | null
+    recorder: GeminiAudioRecorder | null
+    player: GeminiAudioPlayer | null
+    statusCallback: StatusCallback | null
+    apiKey: string | null
+    wsBaseUrl: string | null
+    modelSpeaking: boolean
+}
+
+const state: GeminiLiveState = {
+    ws: null,
+    recorder: null,
+    player: null,
+    statusCallback: null,
+    apiKey: null,
+    wsBaseUrl: null,
+    modelSpeaking: false
+}
+
+function cleanup() {
+    if (state.recorder) {
+        state.recorder.dispose()
+        state.recorder = null
+    }
+    if (state.player) {
+        state.player.dispose()
+        state.player = null
+    }
+    if (state.ws) {
+        if (state.ws.readyState === WebSocket.OPEN || state.ws.readyState === WebSocket.CONNECTING) {
+            state.ws.close()
+        }
+        state.ws = null
+    }
+}
+
+class GeminiLiveVoiceSessionImpl implements VoiceSession {
+    private api: ApiClient
+
+    constructor(api: ApiClient) {
+        this.api = api
+    }
+
+    async startSession(config: VoiceSessionConfig): Promise<void> {
+        cleanup()
+        state.statusCallback?.('connecting')
+
+        // Get API key from hub
+        const tokenResp = await fetchGeminiToken(this.api)
+        if (!tokenResp.allowed || !tokenResp.apiKey) {
+            const msg = tokenResp.error ?? 'Gemini API key not available'
+            state.statusCallback?.('error', msg)
+            throw new Error(msg)
+        }
+        state.apiKey = tokenResp.apiKey
+        state.wsBaseUrl = tokenResp.wsUrl || null
+
+        // Request microphone
+        let permissionStream: MediaStream | null = null
+        try {
+            permissionStream = await navigator.mediaDevices.getUserMedia({ audio: true })
+        } catch (error) {
+            state.statusCallback?.('error', 'Microphone permission denied')
+            throw error
+        } finally {
+            permissionStream?.getTracks().forEach((t) => t.stop())
+        }
+
+        // Connect WebSocket
+        const wsBase = state.wsBaseUrl || DEFAULT_GEMINI_LIVE_WS_BASE
+        const wsUrl = `${wsBase}?key=${encodeURIComponent(state.apiKey)}`
+        const ws = new WebSocket(wsUrl)
+        state.ws = ws
+
+        return new Promise<void>((resolve, reject) => {
+            let setupDone = false
+
+            ws.onopen = () => {
+                if (DEBUG) console.log('[GeminiLive] WebSocket connected, sending setup')
+
+                const liveConfig = buildGeminiLiveConfig()
+                const setupMessage = {
+                    setup: {
+                        model: `models/${liveConfig.model}`,
+                        generationConfig: {
+                            responseModalities: ['AUDIO'],
+                            speechConfig: {
+                                voiceConfig: {
+                                    prebuiltVoiceConfig: { voiceName: 'Aoede' }
+                                }
+                            }
+                        },
+                        systemInstruction: {
+                            parts: [{ text: liveConfig.systemInstruction }]
+                        },
+                        tools: liveConfig.tools.map((t) => ({
+                            functionDeclarations: t.functionDeclarations.map((fd) => ({
+                                name: fd.name,
+                                description: fd.description,
+                                parameters: fd.parameters
+                            }))
+                        }))
+                    }
+                }
+
+                ws.send(JSON.stringify(setupMessage))
+            }
+
+            ws.onmessage = async (event) => {
+                let data: Record<string, unknown>
+                try {
+                    if (event.data instanceof Blob) {
+                        const text = await event.data.text()
+                        data = JSON.parse(text) as Record<string, unknown>
+                    } else {
+                        data = JSON.parse(event.data as string) as Record<string, unknown>
+                    }
+                } catch {
+                    if (DEBUG) console.warn('[GeminiLive] Failed to parse message')
+                    return
+                }
+
+                // Log all message types for debugging
+                const msgKeys = Object.keys(data).filter(k => k !== 'serverContent' || !('modelTurn' in (data.serverContent as Record<string, unknown> || {})))
+                if (!data.serverContent) {
+                    console.log('[GeminiLive] Message:', msgKeys.join(', '), JSON.stringify(data).slice(0, 200))
+                }
+
+                // Setup complete
+                if (data.setupComplete && !setupDone) {
+                    setupDone = true
+                    if (DEBUG) console.log('[GeminiLive] Setup complete')
+                    state.statusCallback?.('connected')
+
+                    // Start audio capture
+                    startAudioCapture()
+
+                    // Send initial context if available (no clientContent greeting — it breaks tool calls)
+                    if (config.initialContext) {
+                        sendClientContent(`[Context] ${config.initialContext}`)
+                    }
+
+                    resolve()
+                    return
+                }
+
+                // Server content (audio / text / turn complete)
+                const serverContent = data.serverContent as {
+                    modelTurn?: { parts?: Array<{ inlineData?: { data: string; mimeType: string }; text?: string }> }
+                    turnComplete?: boolean
+                } | undefined
+
+                if (serverContent) {
+                    if (serverContent.modelTurn?.parts) {
+                        // Model is generating — mute mic to prevent barge-in from noise
+                        if (!state.modelSpeaking) {
+                            state.modelSpeaking = true
+                            state.recorder?.setMuted(true)
+                        }
+                        for (const part of serverContent.modelTurn.parts) {
+                            if (part.inlineData?.data) {
+                                state.player?.enqueue(part.inlineData.data)
+                            }
+                            if (part.text) {
+                                console.log('[GeminiLive] Text:', part.text)
+                            }
+                        }
+                    }
+                    if (serverContent.turnComplete) {
+                        console.log('[GeminiLive] Turn complete')
+                        // Model done — unmute mic for next user turn
+                        state.modelSpeaking = false
+                        state.recorder?.setMuted(false)
+                    }
+                }
+
+                // Tool calls
+                const toolCall = data.toolCall as {
+                    functionCalls?: Array<{ name: string; args: Record<string, unknown>; id: string }>
+                } | undefined
+
+                if (toolCall?.functionCalls && toolCall.functionCalls.length > 0) {
+                    console.log('[GeminiLive] Tool calls:', toolCall.functionCalls.map((c) => c.name))
+
+                    const responses = await handleGeminiFunctionCalls(
+                        toolCall.functionCalls as GeminiFunctionCall[]
+                    )
+
+                    // Send tool responses back
+                    if (state.ws?.readyState === WebSocket.OPEN) {
+                        state.ws.send(JSON.stringify({
+                            toolResponse: {
+                                functionResponses: responses.map((r) => ({
+                                    id: r.id,
+                                    name: r.name,
+                                    response: r.response
+                                }))
+                            }
+                        }))
+                    }
+                }
+            }
+
+            ws.onerror = (event) => {
+                console.error('[GeminiLive] WebSocket error:', event)
+                if (!setupDone) {
+                    state.statusCallback?.('error', 'WebSocket connection failed')
+                    reject(new Error('WebSocket connection failed'))
+                }
+            }
+
+            ws.onclose = (event) => {
+                if (DEBUG) console.log('[GeminiLive] WebSocket closed:', event.code, event.reason)
+                cleanup()
+                resetRealtimeSessionState()
+                state.statusCallback?.('disconnected')
+            }
+        })
+    }
+
+    async endSession(): Promise<void> {
+        cleanup()
+        resetRealtimeSessionState()
+        state.statusCallback?.('disconnected')
+    }
+
+    sendTextMessage(message: string): void {
+        sendClientContent(message)
+    }
+
+    sendContextualUpdate(update: string): void {
+        // Send as a system-like context message
+        sendClientContent(`[System Context Update] ${update}`)
+    }
+}
+
+function sendClientContent(text: string): void {
+    if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return
+    state.ws.send(JSON.stringify({
+        clientContent: {
+            turns: [{ role: 'user', parts: [{ text }] }],
+            turnComplete: true
+        }
+    }))
+}
+
+function sendAudioChunk(base64Pcm: string): void {
+    if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return
+    // Don't send audio while model is speaking
+    if (state.modelSpeaking) return
+    state.ws.send(JSON.stringify({
+        realtimeInput: {
+            mediaChunks: [{
+                mimeType: 'audio/pcm;rate=16000',
+                data: base64Pcm
+            }]
+        }
+    }))
+}
+
+function startAudioCapture(): void {
+    state.player = new GeminiAudioPlayer()
+    state.recorder = new GeminiAudioRecorder()
+
+    state.recorder.start(
+        (pcm16Chunk) => sendAudioChunk(pcm16Chunk),
+        (error) => {
+            console.error('[GeminiLive] Audio capture error:', error)
+            state.statusCallback?.('error', 'Microphone error')
+        }
+    )
+}
+
+// --- React component ---
+
+export interface GeminiLiveVoiceSessionProps {
+    api: ApiClient
+    micMuted?: boolean
+    onStatusChange?: StatusCallback
+    getSession?: (sessionId: string) => Session | null
+    sendMessage?: (sessionId: string, message: string) => void
+    approvePermission?: (sessionId: string, requestId: string) => Promise<void>
+    denyPermission?: (sessionId: string, requestId: string) => Promise<void>
+}
+
+export function GeminiLiveVoiceSession({
+    api,
+    micMuted = false,
+    onStatusChange,
+    getSession,
+    sendMessage,
+    approvePermission,
+    denyPermission
+}: GeminiLiveVoiceSessionProps) {
+    const hasRegistered = useRef(false)
+
+    // Store status callback
+    useEffect(() => {
+        state.statusCallback = onStatusChange || null
+        return () => { state.statusCallback = null }
+    }, [onStatusChange])
+
+    // Register session store for client tools
+    useEffect(() => {
+        if (getSession && sendMessage && approvePermission && denyPermission) {
+            registerSessionStore({
+                getSession: (sessionId: string) =>
+                    getSession(sessionId) as { agentState?: { requests?: Record<string, unknown> } } | null,
+                sendMessage,
+                approvePermission,
+                denyPermission
+            })
+        }
+    }, [getSession, sendMessage, approvePermission, denyPermission])
+
+    // Register voice session once
+    useEffect(() => {
+        if (!hasRegistered.current) {
+            try {
+                registerVoiceSession(new GeminiLiveVoiceSessionImpl(api))
+                hasRegistered.current = true
+            } catch (error) {
+                console.error('[GeminiLive] Failed to register voice session:', error)
+            }
+        }
+    }, [api])
+
+    // Sync mic mute state
+    useEffect(() => {
+        if (state.recorder) {
+            state.recorder.setMuted(micMuted)
+        }
+    }, [micMuted])
+
+    // Handle barge-in: clear audio queue when user starts speaking
+    const handleBargeIn = useCallback(() => {
+        if (state.player?.isPlaying()) {
+            state.player.clearQueue()
+        }
+    }, [])
+
+    // Cleanup on unmount
+    useEffect(() => {
+        return () => {
+            cleanup()
+        }
+    }, [])
+
+    return null
+}
diff --git a/web/src/realtime/QwenVoiceSession.tsx b/web/src/realtime/QwenVoiceSession.tsx
new file mode 100644
index 000000000..3c9808c6f
--- /dev/null
+++ b/web/src/realtime/QwenVoiceSession.tsx
@@ -0,0 +1,376 @@
+import { useEffect, useRef, useCallback } from 'react'
+import { registerVoiceSession, resetRealtimeSessionState } from './RealtimeSession'
+import { registerSessionStore } from './realtimeClientTools'
+import { fetchQwenToken } from '@/api/voice'
+import { GeminiAudioRecorder } from './gemini/audioRecorder'
+import { GeminiAudioPlayer } from './gemini/audioPlayer'
+import { realtimeClientTools } from './realtimeClientTools'
+import {
+    QWEN_REALTIME_MODEL,
+    QWEN_REALTIME_VOICE,
+    VOICE_SYSTEM_PROMPT,
+    VOICE_TOOL_DEFINITIONS
+} from '@hapi/protocol/voice'
+import type { VoiceSession, VoiceSessionConfig, StatusCallback } from './types'
+import type { ApiClient } from '@/api/client'
+import type { Session } from '@/types/api'
+
+const DEBUG = import.meta.env.DEV
+
+// Qwen WebSocket connects via Hub proxy (browser can't set Authorization header)
+
+interface QwenState {
+    ws: WebSocket | null
+    recorder: GeminiAudioRecorder | null
+    player: GeminiAudioPlayer | null
+    statusCallback: StatusCallback | null
+    apiKey: string | null
+    wsBaseUrl: string | null
+}
+
+const state: QwenState = {
+    ws: null,
+    recorder: null,
+    player: null,
+    statusCallback: null,
+    apiKey: null,
+    wsBaseUrl: null
+}
+
+let eventCounter = 0
+function nextEventId(): string {
+    return `evt_${++eventCounter}`
+}
+
+function cleanup() {
+    if (state.recorder) {
+        state.recorder.dispose()
+        state.recorder = null
+    }
+    if (state.player) {
+        state.player.dispose()
+        state.player = null
+    }
+    if (state.ws) {
+        if (state.ws.readyState === WebSocket.OPEN || state.ws.readyState === WebSocket.CONNECTING) {
+            state.ws.close()
+        }
+        state.ws = null
+    }
+}
+
+function sendEvent(type: string, payload?: Record<string, unknown>): void {
+    if (!state.ws || state.ws.readyState !== WebSocket.OPEN) return
+    state.ws.send(JSON.stringify({
+        event_id: nextEventId(),
+        type,
+        ...payload
+    }))
+}
+
+class QwenVoiceSessionImpl implements VoiceSession {
+    private api: ApiClient
+
+    constructor(api: ApiClient) {
+        this.api = api
+    }
+
+    async startSession(config: VoiceSessionConfig): Promise<void> {
+        cleanup()
+        state.statusCallback?.('connecting')
+
+        // Get API key from hub
+        const tokenResp = await fetchQwenToken(this.api)
+        if (!tokenResp.allowed || !tokenResp.apiKey) {
+            const msg = tokenResp.error ?? 'DashScope API key not available'
+            state.statusCallback?.('error', msg)
+            throw new Error(msg)
+        }
+        state.apiKey = tokenResp.apiKey
+        state.wsBaseUrl = tokenResp.wsUrl || null
+
+        // Request microphone
+        let permissionStream: MediaStream | null = null
+        try {
+            permissionStream = await navigator.mediaDevices.getUserMedia({ audio: true })
+        } catch (error) {
+            state.statusCallback?.('error', 'Microphone permission denied')
+            throw error
+        } finally {
+            permissionStream?.getTracks().forEach((t) => t.stop())
+        }
+
+        // Connect via Hub WebSocket proxy (DashScope requires Authorization header,
+        // which browser WebSocket API doesn't support)
+        const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'
+        const proxyBase = state.wsBaseUrl || `${protocol}//${window.location.host}`
+        const model = QWEN_REALTIME_MODEL
+        const wsUrl = `${proxyBase}/api/voice/qwen-ws?model=${encodeURIComponent(model)}`
+        const ws = new WebSocket(wsUrl)
+        state.ws = ws
+
+        return new Promise<void>((resolve, reject) => {
+            let sessionCreated = false
+
+            ws.onopen = () => {
+                if (DEBUG) console.log('[Qwen] WebSocket connected')
+            }
+
+            ws.onmessage = async (event) => {
+                let data: Record<string, unknown>
+                try {
+                    data = JSON.parse(event.data as string) as Record<string, unknown>
+                } catch {
+                    if (DEBUG) console.warn('[Qwen] Failed to parse message')
+                    return
+                }
+
+                const eventType = data.type as string
+
+                // Session created - send configuration
+                if (eventType === 'session.created' && !sessionCreated) {
+                    sessionCreated = true
+                    if (DEBUG) console.log('[Qwen] Session created')
+
+                    // Build tools config
+                    const tools = VOICE_TOOL_DEFINITIONS.map((td) => ({
+                        type: 'function' as const,
+                        name: td.name,
+                        description: td.description,
+                        parameters: td.parameters
+                    }))
+
+                    // Send session.update with full configuration
+                    const instructions = config.initialContext
+                        ? `${VOICE_SYSTEM_PROMPT}\n\n[Current Context]\n${config.initialContext}`
+                        : VOICE_SYSTEM_PROMPT
+
+                    sendEvent('session.update', {
+                        session: {
+                            modalities: ['text', 'audio'],
+                            voice: QWEN_REALTIME_VOICE,
+                            input_audio_format: 'pcm',
+                            output_audio_format: 'pcm',
+                            instructions,
+                            temperature: 0.7,
+                            turn_detection: {
+                                type: 'server_vad',
+                                threshold: 0.5,
+                                silence_duration_ms: 800,
+                                prefix_padding_ms: 300
+                            },
+                            tools,
+                            tool_choice: 'auto'
+                        }
+                    })
+                    return
+                }
+
+                // Session updated - ready to go
+                if (eventType === 'session.updated') {
+                    if (DEBUG) console.log('[Qwen] Session configured')
+                    state.statusCallback?.('connected')
+                    startAudioCapture()
+                    resolve()
+                    return
+                }
+
+                // Audio output streaming
+                if (eventType === 'response.audio.delta') {
+                    const delta = data.delta as string
+                    if (delta) {
+                        state.player?.enqueue(delta)
+                    }
+                    return
+                }
+
+                // Text transcript (for debug)
+                if (eventType === 'response.audio_transcript.delta' && DEBUG) {
+                    console.log('[Qwen] Transcript:', data.delta)
+                    return
+                }
+
+                // Function call complete
+                if (eventType === 'response.function_call_arguments.done') {
+                    const callId = data.call_id as string
+                    const fnName = data.name as string
+                    const argsStr = data.arguments as string
+
+                    if (DEBUG) console.log('[Qwen] Tool call:', fnName, argsStr)
+
+                    let args: Record<string, unknown> = {}
+                    try { args = JSON.parse(argsStr) } catch { /* empty */ }
+
+                    // Execute the tool
+                    const handler = fnName === 'messageCodingAgent'
+                        ? realtimeClientTools.messageCodingAgent
+                        : fnName === 'processPermissionRequest'
+                        ? realtimeClientTools.processPermissionRequest
+                        : null
+
+                    const result = handler
+                        ? await handler(args)
+                        : `error (unknown tool: ${fnName})`
+
+                    // Send function result back
+                    sendEvent('conversation.item.create', {
+                        item: {
+                            type: 'function_call_output',
+                            call_id: callId,
+                            output: typeof result === 'string' ? result : JSON.stringify(result)
+                        }
+                    })
+                    // Trigger model to continue
+                    sendEvent('response.create')
+                    return
+                }
+
+                // VAD: user started speaking - barge-in
+                if (eventType === 'input_audio_buffer.speech_started') {
+                    if (state.player?.isPlaying()) {
+                        state.player.clearQueue()
+                    }
+                    return
+                }
+
+                // Response done
+                if (eventType === 'response.done' && DEBUG) {
+                    const resp = data.response as Record<string, unknown> | undefined
+                    const usage = resp?.usage as Record<string, unknown> | undefined
+                    if (usage) console.log('[Qwen] Usage:', usage)
+                    return
+                }
+
+                // Error
+                if (eventType === 'error') {
+                    const err = data.error as { message?: string } | undefined
+                    console.error('[Qwen] Server error:', err?.message || data)
+                    return
+                }
+            }
+
+            ws.onerror = (event) => {
+                console.error('[Qwen] WebSocket error:', event)
+                if (!sessionCreated) {
+                    state.statusCallback?.('error', 'WebSocket connection failed')
+                    reject(new Error('WebSocket connection failed'))
+                }
+            }
+
+            ws.onclose = (event) => {
+                if (DEBUG) console.log('[Qwen] WebSocket closed:', event.code, event.reason)
+                cleanup()
+                resetRealtimeSessionState()
+                state.statusCallback?.('disconnected')
+            }
+        })
+    }
+
+    async endSession(): Promise<void> {
+        cleanup()
+        resetRealtimeSessionState()
+        state.statusCallback?.('disconnected')
+    }
+
+    sendTextMessage(message: string): void {
+        // Send text as a user message via conversation.item.create
+        sendEvent('conversation.item.create', {
+            item: {
+                type: 'message',
+                role: 'user',
+                content: [{ type: 'input_text', text: message }]
+            }
+        })
+        sendEvent('response.create')
+    }
+
+    sendContextualUpdate(update: string): void {
+        // Send context as a system-like user message
+        sendEvent('conversation.item.create', {
+            item: {
+                type: 'message',
+                role: 'user',
+                content: [{ type: 'input_text', text: `[System Context Update] ${update}` }]
+            }
+        })
+    }
+}
+
+function startAudioCapture(): void {
+    state.player = new GeminiAudioPlayer()
+    state.recorder = new GeminiAudioRecorder()
+
+    state.recorder.start(
+        (base64Pcm) => {
+            sendEvent('input_audio_buffer.append', { audio: base64Pcm })
+        },
+        (error) => {
+            console.error('[Qwen] Audio capture error:', error)
+            state.statusCallback?.('error', 'Microphone error')
+        }
+    )
+}
+
+// --- React component ---
+
+export interface QwenVoiceSessionProps {
+    api: ApiClient
+    micMuted?: boolean
+    onStatusChange?: StatusCallback
+    getSession?: (sessionId: string) => Session | null
+    sendMessage?: (sessionId: string, message: string) => void
+    approvePermission?: (sessionId: string, requestId: string) => Promise<void>
+    denyPermission?: (sessionId: string, requestId: string) => Promise<void>
+}
+
+export function QwenVoiceSession({
+    api,
+    micMuted = false,
+    onStatusChange,
+    getSession,
+    sendMessage,
+    approvePermission,
+    denyPermission
+}: QwenVoiceSessionProps) {
+    const hasRegistered = useRef(false)
+
+    useEffect(() => {
+        state.statusCallback = onStatusChange || null
+        return () => { state.statusCallback = null }
+    }, [onStatusChange])
+
+    useEffect(() => {
+        if (getSession && sendMessage && approvePermission && denyPermission) {
+            registerSessionStore({
+                getSession: (sessionId: string) =>
+                    getSession(sessionId) as { agentState?: { requests?: Record<string, unknown> } } | null,
+                sendMessage,
+                approvePermission,
+                denyPermission
+            })
+        }
+    }, [getSession, sendMessage, approvePermission, denyPermission])
+
+    useEffect(() => {
+        if (!hasRegistered.current) {
+            try {
+                registerVoiceSession(new QwenVoiceSessionImpl(api))
+                hasRegistered.current = true
+            } catch (error) {
+                console.error('[Qwen] Failed to register voice session:', error)
+            }
+        }
+    }, [api])
+
+    useEffect(() => {
+        if (state.recorder) {
+            state.recorder.setMuted(micMuted)
+        }
+    }, [micMuted])
+
+    useEffect(() => {
+        return () => { cleanup() }
+    }, [])
+
+    return null
+}
diff --git a/web/src/realtime/VoiceBackendSession.tsx b/web/src/realtime/VoiceBackendSession.tsx
new file mode 100644
index 000000000..c23dfa150
--- /dev/null
+++ b/web/src/realtime/VoiceBackendSession.tsx
@@ -0,0 +1,54 @@
+import { lazy, Suspense, useEffect, useState } from 'react'
+import { RealtimeVoiceSession } from './RealtimeVoiceSession'
+import type { RealtimeVoiceSessionProps } from './RealtimeVoiceSession'
+import { fetchVoiceBackend } from '@/api/voice'
+import type { ApiClient } from '@/api/client'
+import type { VoiceBackendType } from '@hapi/protocol/voice'
+
+// Lazy-load alternative backends to avoid bundling when using ElevenLabs
+const GeminiLiveVoiceSession = lazy(() =>
+    import('./GeminiLiveVoiceSession').then((m) => ({ default: m.GeminiLiveVoiceSession }))
+)
+const QwenVoiceSession = lazy(() =>
+    import('./QwenVoiceSession').then((m) => ({ default: m.QwenVoiceSession }))
+)
+
+export type VoiceBackendSessionProps = RealtimeVoiceSessionProps & {
+    api: ApiClient
+}
+
+/**
+ * Dynamically selects the voice session component based on the hub's configured backend.
+ * Queries GET /voice/backend once on mount and renders the appropriate component.
+ */
+export function VoiceBackendSession(props: VoiceBackendSessionProps) {
+    const [backend, setBackend] = useState<VoiceBackendType | null>(null)
+
+    useEffect(() => {
+        let cancelled = false
+        fetchVoiceBackend(props.api).then((resp) => {
+            if (!cancelled) setBackend(resp.backend)
+        })
+        return () => { cancelled = true }
+    }, [props.api])
+
+    if (!backend) return null
+
+    if (backend === 'gemini-live') {
+        return (
+            <Suspense fallback={null}>
+                <GeminiLiveVoiceSession {...props} />
+            </Suspense>
+        )
+    }
+
+    if (backend === 'qwen-realtime') {
+        return (
+            <Suspense fallback={null}>
+                <QwenVoiceSession {...props} />
+            </Suspense>
+        )
+    }
+
+    return <RealtimeVoiceSession {...props} />
+}
diff --git a/web/src/realtime/gemini/audioPlayer.ts b/web/src/realtime/gemini/audioPlayer.ts
new file mode 100644
index 000000000..23d1d341e
--- /dev/null
+++ b/web/src/realtime/gemini/audioPlayer.ts
@@ -0,0 +1,75 @@
+import { base64ToArrayBuffer, pcm16ToFloat32 } from './pcmUtils';
+
+export class GeminiAudioPlayer {
+  private audioContext: AudioContext;
+  private ownsContext: boolean;
+  private lastEndTime: number = 0;
+  private activeSources: AudioBufferSourceNode[] = [];
+
+  constructor(audioContext?: AudioContext) {
+    if (audioContext) {
+      this.audioContext = audioContext;
+      this.ownsContext = false;
+    } else {
+      this.audioContext = new AudioContext({ sampleRate: 24000 });
+      this.ownsContext = true;
+    }
+    this.lastEndTime = this.audioContext.currentTime;
+  }
+
+  enqueue(base64Pcm: string): void {
+    if (this.audioContext.state === 'suspended') {
+      this.audioContext.resume();
+    }
+
+    const arrayBuffer = base64ToArrayBuffer(base64Pcm);
+    const float32Data = pcm16ToFloat32(arrayBuffer);
+    
+    if (float32Data.length === 0) return;
+
+    const audioBuffer = this.audioContext.createBuffer(1, float32Data.length, 24000);
+    audioBuffer.copyToChannel(new Float32Array(float32Data), 0);
+
+    const source = this.audioContext.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(this.audioContext.destination);
+
+    const startTime = Math.max(this.audioContext.currentTime, this.lastEndTime);
+    
+    source.onended = () => {
+      const index = this.activeSources.indexOf(source);
+      if (index > -1) {
+        this.activeSources.splice(index, 1);
+      }
+    };
+
+    source.start(startTime);
+    this.activeSources.push(source);
+
+    this.lastEndTime = startTime + audioBuffer.duration;
+  }
+
+  clearQueue(): void {
+    this.activeSources.forEach(source => {
+      try {
+        source.stop();
+      } catch (e) {
+        // Ignore if already stopped
+      }
+      source.disconnect();
+    });
+    this.activeSources = [];
+    this.lastEndTime = this.audioContext.currentTime;
+  }
+
+  isPlaying(): boolean {
+    return this.lastEndTime > this.audioContext.currentTime;
+  }
+
+  dispose(): void {
+    this.clearQueue();
+    if (this.ownsContext && this.audioContext.state !== 'closed') {
+      this.audioContext.close();
+    }
+  }
+}
diff --git a/web/src/realtime/gemini/audioRecorder.ts b/web/src/realtime/gemini/audioRecorder.ts
new file mode 100644
index 000000000..b1c01a7c4
--- /dev/null
+++ b/web/src/realtime/gemini/audioRecorder.ts
@@ -0,0 +1,132 @@
+import { float32ToPcm16, arrayBufferToBase64 } from './pcmUtils';
+
+// Inline worklet source to avoid Vite bundling issues with ?url imports.
+// AudioWorklet.addModule() requires a URL to valid JS, so we create a Blob URL.
+const WORKLET_SOURCE = `
+class PcmRecorderProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.buffer = new Float32Array(4096);
+    this.idx = 0;
+  }
+  process(inputs) {
+    const input = inputs[0];
+    if (input && input.length > 0) {
+      const channel = input[0];
+      for (let i = 0; i < channel.length; i++) {
+        this.buffer[this.idx++] = channel[i];
+        if (this.idx >= 4096) {
+          this.port.postMessage({ samples: this.buffer.slice() });
+          this.idx = 0;
+        }
+      }
+    }
+    return true;
+  }
+}
+registerProcessor('pcm-recorder-processor', PcmRecorderProcessor);
+`;
+
+function createWorkletUrl(): string {
+  const blob = new Blob([WORKLET_SOURCE], { type: 'application/javascript' });
+  return URL.createObjectURL(blob);
+}
+
+export class GeminiAudioRecorder {
+  private audioContext: AudioContext | null = null;
+  private mediaStream: MediaStream | null = null;
+  private sourceNode: MediaStreamAudioSourceNode | null = null;
+  private workletNode: AudioWorkletNode | null = null;
+  private scriptNode: ScriptProcessorNode | null = null;
+
+  async start(onChunk: (base64Pcm: string) => void, onError?: (error: Error) => void): Promise<void> {
+    try {
+      this.mediaStream = await navigator.mediaDevices.getUserMedia({
+        audio: { sampleRate: 16000, channelCount: 1 }
+      });
+
+      this.mediaStream.getTracks().forEach((track) => {
+        track.onended = () => {
+          if (onError) onError(new Error('Microphone disconnected'));
+        };
+      });
+
+      this.audioContext = new AudioContext({ sampleRate: 16000 });
+      if (this.audioContext.state === 'suspended') {
+        await this.audioContext.resume();
+      }
+
+      this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
+
+      try {
+        const workletUrl = createWorkletUrl();
+        await this.audioContext.audioWorklet.addModule(workletUrl);
+        URL.revokeObjectURL(workletUrl);
+
+        this.workletNode = new AudioWorkletNode(this.audioContext, 'pcm-recorder-processor');
+        this.workletNode.port.onmessage = (event) => {
+          const pcm16 = float32ToPcm16(event.data.samples);
+          const base64 = arrayBufferToBase64(pcm16);
+          onChunk(base64);
+        };
+        this.sourceNode.connect(this.workletNode);
+      } catch (e) {
+        console.warn('[GeminiLive] AudioWorklet failed, falling back to ScriptProcessorNode', e);
+        this.scriptNode = this.audioContext.createScriptProcessor(4096, 1, 1);
+        this.scriptNode.onaudioprocess = (event) => {
+          const inputData = event.inputBuffer.getChannelData(0);
+          const pcm16 = float32ToPcm16(new Float32Array(inputData));
+          const base64 = arrayBufferToBase64(pcm16);
+          onChunk(base64);
+        };
+        this.sourceNode.connect(this.scriptNode);
+        this.scriptNode.connect(this.audioContext.destination);
+      }
+    } catch (e) {
+      if (onError) onError(e instanceof Error ? e : new Error(String(e)));
+      throw e;
+    }
+  }
+
+  stop(): void {
+    if (this.mediaStream) {
+      this.mediaStream.getTracks().forEach(track => {
+        track.onended = null;
+        track.stop();
+      });
+      this.mediaStream = null;
+    }
+
+    if (this.scriptNode) {
+      this.scriptNode.disconnect();
+      this.scriptNode = null;
+    }
+
+    if (this.workletNode) {
+      this.workletNode.disconnect();
+      this.workletNode = null;
+    }
+
+    if (this.sourceNode) {
+      this.sourceNode.disconnect();
+      this.sourceNode = null;
+    }
+
+    if (this.audioContext) {
+      this.audioContext.close();
+      this.audioContext = null;
+    }
+  }
+
+  setMuted(muted: boolean): void {
+    if (this.mediaStream) {
+      this.mediaStream.getAudioTracks().forEach(track => {
+        track.enabled = !muted;
+      });
+    }
+  }
+
+  dispose(): void {
+    this.stop();
+  }
+}
diff --git a/web/src/realtime/gemini/pcm-recorder.worklet.ts b/web/src/realtime/gemini/pcm-recorder.worklet.ts
new file mode 100644
index 000000000..404f65445
--- /dev/null
+++ b/web/src/realtime/gemini/pcm-recorder.worklet.ts
@@ -0,0 +1,35 @@
+// AudioWorklet processor runs in a separate scope with its own globals.
+// These declarations satisfy TypeScript without pulling in DOM lib types.
+declare class AudioWorkletProcessor {
+  readonly port: MessagePort
+  constructor()
+}
+declare function registerProcessor(name: string, ctor: new () => AudioWorkletProcessor): void
+
+class PcmRecorderProcessor extends AudioWorkletProcessor {
+  private buffer: Float32Array;
+  private bufferSize = 4096;
+  private bufferIndex = 0;
+
+  constructor() {
+    super();
+    this.buffer = new Float32Array(this.bufferSize);
+  }
+
+  process(inputs: Float32Array[][]): boolean {
+    const input = inputs[0];
+    if (input && input.length > 0) {
+      const channel = input[0];
+      for (let i = 0; i < channel.length; i++) {
+        this.buffer[this.bufferIndex++] = channel[i];
+        if (this.bufferIndex >= this.bufferSize) {
+          this.port.postMessage({ samples: this.buffer.slice() });
+          this.bufferIndex = 0;
+        }
+      }
+    }
+    return true;
+  }
+}
+
+registerProcessor('pcm-recorder-processor', PcmRecorderProcessor);
diff --git a/web/src/realtime/gemini/pcmUtils.test.ts b/web/src/realtime/gemini/pcmUtils.test.ts
new file mode 100644
index 000000000..2e0be05c3
--- /dev/null
+++ b/web/src/realtime/gemini/pcmUtils.test.ts
@@ -0,0 +1,60 @@
+import { describe, test, expect } from 'bun:test'
+import {
+    float32ToPcm16,
+    pcm16ToFloat32,
+    arrayBufferToBase64,
+    base64ToArrayBuffer
+} from './pcmUtils'
+
+describe('pcmUtils', () => {
+    describe('float32ToPcm16 / pcm16ToFloat32 round-trip', () => {
+        test('preserves signal within quantization error', () => {
+            const input = new Float32Array([0, 0.5, -0.5, 1.0, -1.0])
+            const pcm16 = float32ToPcm16(input)
+            const output = pcm16ToFloat32(pcm16)
+
+            expect(output.length).toBe(input.length)
+            for (let i = 0; i < input.length; i++) {
+                expect(Math.abs(output[i] - input[i])).toBeLessThan(0.001)
+            }
+        })
+
+        test('clamps values outside [-1, 1]', () => {
+            const input = new Float32Array([2.0, -2.0])
+            const pcm16 = float32ToPcm16(input)
+            const output = pcm16ToFloat32(pcm16)
+
+            expect(Math.abs(output[0] - 1.0)).toBeLessThan(0.001)
+            expect(Math.abs(output[1] - (-1.0))).toBeLessThan(0.001)
+        })
+
+        test('handles empty input', () => {
+            const input = new Float32Array(0)
+            const pcm16 = float32ToPcm16(input)
+            expect(pcm16.byteLength).toBe(0)
+            const output = pcm16ToFloat32(pcm16)
+            expect(output.length).toBe(0)
+        })
+    })
+
+    describe('arrayBufferToBase64 / base64ToArrayBuffer round-trip', () => {
+        test('preserves binary data', () => {
+            const original = new Uint8Array([0, 1, 127, 128, 255])
+            const base64 = arrayBufferToBase64(original.buffer)
+            const restored = new Uint8Array(base64ToArrayBuffer(base64))
+
+            expect(restored.length).toBe(original.length)
+            for (let i = 0; i < original.length; i++) {
+                expect(restored[i]).toBe(original[i])
+            }
+        })
+
+        test('handles empty buffer', () => {
+            const empty = new ArrayBuffer(0)
+            const base64 = arrayBufferToBase64(empty)
+            expect(base64).toBe('')
+            const restored = base64ToArrayBuffer(base64)
+            expect(restored.byteLength).toBe(0)
+        })
+    })
+})
diff --git a/web/src/realtime/gemini/pcmUtils.ts b/web/src/realtime/gemini/pcmUtils.ts
new file mode 100644
index 000000000..67e2928fc
--- /dev/null
+++ b/web/src/realtime/gemini/pcmUtils.ts
@@ -0,0 +1,39 @@
+export function float32ToPcm16(samples: Float32Array): ArrayBuffer {
+  const buffer = new ArrayBuffer(samples.length * 2);
+  const view = new DataView(buffer);
+  for (let i = 0; i < samples.length; i++) {
+    let s = Math.max(-1, Math.min(1, samples[i]));
+    s = s < 0 ? s * 0x8000 : s * 0x7FFF;
+    view.setInt16(i * 2, s, true);
+  }
+  return buffer;
+}
+
+export function pcm16ToFloat32(buffer: ArrayBuffer): Float32Array {
+  const int16Array = new Int16Array(buffer);
+  const float32Array = new Float32Array(int16Array.length);
+  for (let i = 0; i < int16Array.length; i++) {
+    const s = int16Array[i];
+    float32Array[i] = s < 0 ? s / 0x8000 : s / 0x7FFF;
+  }
+  return float32Array;
+}
+
+export function arrayBufferToBase64(buffer: ArrayBuffer): string {
+  let binary = '';
+  const bytes = new Uint8Array(buffer);
+  const len = bytes.byteLength;
+  for (let i = 0; i < len; i++) {
+    binary += String.fromCharCode(bytes[i]);
+  }
+  return btoa(binary);
+}
+
+export function base64ToArrayBuffer(base64: string): ArrayBuffer {
+  const binary = atob(base64);
+  const bytes = new Uint8Array(binary.length);
+  for (let i = 0; i < binary.length; i++) {
+    bytes[i] = binary.charCodeAt(i);
+  }
+  return bytes.buffer;
+}
diff --git a/web/src/realtime/gemini/toolAdapter.test.ts b/web/src/realtime/gemini/toolAdapter.test.ts
new file mode 100644
index 000000000..5d98d6d4d
--- /dev/null
+++ b/web/src/realtime/gemini/toolAdapter.test.ts
@@ -0,0 +1,28 @@
+import { describe, test, expect } from 'bun:test'
+import { handleGeminiFunctionCall, handleGeminiFunctionCalls } from './toolAdapter'
+import type { GeminiFunctionCall } from './toolAdapter'
+
+describe('toolAdapter', () => {
+    test('returns error for unknown tool', async () => {
+        const call: GeminiFunctionCall = {
+            name: 'unknownTool',
+            args: {},
+            id: 'call-1'
+        }
+        const resp = await handleGeminiFunctionCall(call)
+        expect(resp.name).toBe('unknownTool')
+        expect(resp.id).toBe('call-1')
+        expect(resp.response.result).toContain('unknown tool')
+    })
+
+    test('handles multiple calls in parallel', async () => {
+        const calls: GeminiFunctionCall[] = [
+            { name: 'unknownA', args: {}, id: 'a' },
+            { name: 'unknownB', args: {}, id: 'b' }
+        ]
+        const responses = await handleGeminiFunctionCalls(calls)
+        expect(responses.length).toBe(2)
+        expect(responses[0].id).toBe('a')
+        expect(responses[1].id).toBe('b')
+    })
+})
diff --git a/web/src/realtime/gemini/toolAdapter.ts b/web/src/realtime/gemini/toolAdapter.ts
new file mode 100644
index 000000000..dd44e4fb1
--- /dev/null
+++ b/web/src/realtime/gemini/toolAdapter.ts
@@ -0,0 +1,70 @@
+import { realtimeClientTools } from '../realtimeClientTools'
+
+/**
+ * Gemini Live API function call from server.
+ * Matches the `toolCall` shape in a BidiGenerateContent serverMessage.
+ */
+export interface GeminiFunctionCall {
+    name: string
+    args: Record<string, unknown>
+    id: string
+}
+
+/**
+ * Response sent back to Gemini Live via `toolResponse`.
+ */
+export interface GeminiFunctionResponse {
+    name: string
+    id: string
+    response: { result: string }
+}
+
+type ClientToolHandler = (parameters: unknown) => Promise<string>
+
+const toolHandlers: Record<string, ClientToolHandler> = {
+    messageCodingAgent: realtimeClientTools.messageCodingAgent,
+    processPermissionRequest: realtimeClientTools.processPermissionRequest
+}
+
+/**
+ * Execute a Gemini Live function call using the existing client tool handlers.
+ * Returns a GeminiFunctionResponse ready to send back over the WebSocket.
+ */
+export async function handleGeminiFunctionCall(
+    call: GeminiFunctionCall
+): Promise<GeminiFunctionResponse> {
+    const handler = toolHandlers[call.name]
+
+    if (!handler) {
+        return {
+            name: call.name,
+            id: call.id,
+            response: { result: `error (unknown tool: ${call.name})` }
+        }
+    }
+
+    try {
+        const result = await handler(call.args)
+        return {
+            name: call.name,
+            id: call.id,
+            response: { result }
+        }
+    } catch (error) {
+        const message = error instanceof Error ? error.message : 'unknown error'
+        return {
+            name: call.name,
+            id: call.id,
+            response: { result: `error (${message})` }
+        }
+    }
+}
+
+/**
+ * Process multiple function calls in parallel and return all responses.
+ */
+export async function handleGeminiFunctionCalls(
+    calls: GeminiFunctionCall[]
+): Promise<GeminiFunctionResponse[]> {
+    return Promise.all(calls.map(handleGeminiFunctionCall))
+}
diff --git a/web/src/realtime/index.ts b/web/src/realtime/index.ts
index a7fa2fbe9..1e080123b 100644
--- a/web/src/realtime/index.ts
+++ b/web/src/realtime/index.ts
@@ -15,8 +15,11 @@ export {
 // Client tools
 export { realtimeClientTools, registerSessionStore } from './realtimeClientTools'
 
-// Voice session component
+// Voice session components
 export { RealtimeVoiceSession, type RealtimeVoiceSessionProps } from './RealtimeVoiceSession'
+export { GeminiLiveVoiceSession, type GeminiLiveVoiceSessionProps } from './GeminiLiveVoiceSession'
+export { QwenVoiceSession, type QwenVoiceSessionProps } from './QwenVoiceSession'
+export { VoiceBackendSession, type VoiceBackendSessionProps } from './VoiceBackendSession'
 
 // Voice hooks
 export { voiceHooks, registerVoiceHooksStore } from './hooks/voiceHooks'
diff --git a/web/src/realtime/realtimeClientTools.ts b/web/src/realtime/realtimeClientTools.ts
index a2490ac81..962898c56 100644
--- a/web/src/realtime/realtimeClientTools.ts
+++ b/web/src/realtime/realtimeClientTools.ts
@@ -45,10 +45,8 @@ export const realtimeClientTools = {
             return 'error (session store not available)'
         }
 
-        if (VOICE_CONFIG.ENABLE_DEBUG_LOGGING) {
-            console.log('[Voice] messageCodingAgent called with:', message)
-            console.log('[Voice] Sending message to session:', sessionId)
-        }
+        console.log('[Voice] messageCodingAgent called with:', message)
+        console.log('[Voice] Sending message to session:', sessionId)
 
         sessionStore.sendMessage(sessionId, message)
         return "sent [DO NOT say anything else, simply say 'sent']"
diff --git a/web/src/sw.ts b/web/src/sw.ts
index ebe55dc0a..732ebef29 100644
--- a/web/src/sw.ts
+++ b/web/src/sw.ts
@@ -21,6 +21,10 @@ type PushPayload = {
     }
 }
 
+// Activate new SW immediately without waiting for all tabs to close
+self.addEventListener('install', () => { self.skipWaiting() })
+self.addEventListener('activate', (event) => { event.waitUntil(self.clients.claim()) })
+
 precacheAndRoute(self.__WB_MANIFEST)
 
 registerRoute(
diff --git a/web/tsconfig.json b/web/tsconfig.json
index 8b0682a4b..de7bcdca5 100644
--- a/web/tsconfig.json
+++ b/web/tsconfig.json
@@ -11,5 +11,6 @@
             "@/*": ["./src/*"]
         }
     },
-    "include": ["src"]
+    "include": ["src"],
+    "exclude": ["src/**/*.test.ts", "src/**/*.test.tsx", "src/**/*.spec.ts", "src/**/*.spec.tsx"]
 }