From 620db988d14868e89d0853703bb75e003e46900d Mon Sep 17 00:00:00 2001 From: DjDeveloperr Date: Mon, 4 May 2026 16:57:26 -0400 Subject: [PATCH 1/2] fix studio expose reliability --- README.md | 3 +- client/src/app/AppShell.tsx | 205 ++++++++- .../src/features/simulators/SimulatorMenu.tsx | 65 ++- client/src/features/stream/streamTypes.ts | 11 +- .../src/features/stream/streamWorkerClient.ts | 56 ++- client/src/features/stream/useLiveStream.ts | 21 +- client/src/features/toolbar/Toolbar.tsx | 3 + client/src/styles/components.css | 18 + docs/guide/video.md | 2 +- package.json | 1 + scripts/e2e-webrtc-reliability.mjs | 168 ++++++-- scripts/e2e-webrtc-stress.mjs | 328 ++++++++++++++ scripts/studio-provider-bridge.mjs | 405 ++++++++++++++++-- scripts/studio-provider-bridge.test.mjs | 38 +- server/src/api/routes.rs | 76 +++- server/src/main.rs | 162 ++++++- server/src/simulators/session.rs | 102 ++++- server/src/transport/webrtc.rs | 81 ++-- skills/simdeck/SKILL.md | 3 +- 19 files changed, 1580 insertions(+), 168 deletions(-) create mode 100644 scripts/e2e-webrtc-stress.mjs diff --git a/README.md b/README.md index 7b84346..39e95ce 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,8 @@ profile (`1170` longest edge, dynamic up to `60` fps). Use `--stream-quality quality|balanced|fast|smooth|economy|ci-software` to override it, or pass `--video-codec hardware` when a dedicated hardware encoder is preferable. The remote viewer renders live video with the browser's native video element; -the canvas is only used for input geometry. +the canvas is only used for input geometry. Remote viewers can choose 15, 30, +or 60 fps in the browser stream menu. CLI commands automatically use the same warm daemon: diff --git a/client/src/app/AppShell.tsx b/client/src/app/AppShell.tsx index 1e113e9..05c184e 100644 --- a/client/src/app/AppShell.tsx +++ b/client/src/app/AppShell.tsx @@ -8,7 +8,12 @@ import { type FormEvent, } from "react"; -import { ApiError, accessTokenFromLocation, pairBrowser } from "../api/client"; +import { + ApiError, + accessTokenFromLocation, + apiRequest, + pairBrowser, +} from "../api/client"; import { apiUrl, configureSimDeckClient } from "../api/config"; import { bootSimulator, @@ -98,12 +103,38 @@ const LOCAL_STREAM_DEFAULTS: StreamConfig = { quality: "quality", }; const REMOTE_STREAM_DEFAULTS: StreamConfig = { - encoder: "auto", + encoder: "software", fps: 30, quality: "balanced", }; +const STREAM_CONFIG_SYNC_INTERVAL_MS = 5000; +const STREAM_CONFIG_USER_CHANGE_GRACE_MS = 1000; +const STREAM_ENCODER_VALUES = new Set([ + "auto", + "hardware", + "software", +]); +const STREAM_QUALITY_VALUES = new Set([ + "balanced", + "ci-software", + "economy", + "fast", + "quality", + "smooth", +]); clearLegacyVolatileUiState(); +interface StreamQualityResponse { + ok?: boolean; + quality?: { + fps?: number; + maxEdge?: number; + profile?: string; + videoCodec?: string; + }; + videoCodec?: string; +} + function buildChromeUrl(udid: string, stamp: number): string { return buildAuthenticatedAssetUrl( `/api/simulators/${udid}/chrome.png`, @@ -296,6 +327,8 @@ export function AppShell({ const [streamConfig, setStreamConfig] = useState(() => remoteStream ? REMOTE_STREAM_DEFAULTS : LOCAL_STREAM_DEFAULTS, ); + const [streamConfigApplyKey, setStreamConfigApplyKey] = useState(0); + const [streamConfigReady, setStreamConfigReady] = useState(false); const [touchIndicators, setTouchIndicators] = useState([]); const menuRef = useRef(null); @@ -313,6 +346,8 @@ export function AppShell({ const gestureStartZoomRef = useRef(1); const accessibilityRequestIdRef = useRef(0); const accessibilityLoadingRef = useRef(false); + const streamConfigRequestIdRef = useRef(0); + const streamConfigUserChangeAtRef = useRef(0); const controlSocketRef = useRef<{ udid: string; socket: WebSocket; @@ -395,6 +430,52 @@ export function AppShell({ [], ); + const syncStreamConfig = useCallback(async () => { + const requestId = ++streamConfigRequestIdRef.current; + try { + const response = await apiRequest( + "/api/stream-quality", + ); + if (requestId !== streamConfigRequestIdRef.current) { + return; + } + if ( + Date.now() - streamConfigUserChangeAtRef.current < + STREAM_CONFIG_USER_CHANGE_GRACE_MS + ) { + return; + } + setStreamConfig((current) => + mergeStreamQualityResponse(current, response), + ); + } catch { + // Keep the existing local/default selection; the stream path will surface + // provider reachability errors separately. + } finally { + if (requestId === streamConfigRequestIdRef.current) { + setStreamConfigReady(true); + } + } + }, []); + + useEffect(() => { + let cancelled = false; + setStreamConfigReady(false); + + const run = () => { + if (!cancelled) { + void syncStreamConfig(); + } + }; + + run(); + const intervalId = window.setInterval(run, STREAM_CONFIG_SYNC_INTERVAL_MS); + return () => { + cancelled = true; + window.clearInterval(intervalId); + }; + }, [remoteStream, syncStreamConfig]); + const { deviceNaturalSize, error: streamError, @@ -407,20 +488,31 @@ export function AppShell({ streamCanvasKey, } = useLiveStream({ canvasElement: streamCanvasElement, + paused: !streamConfigReady, remote: remoteStream, simulator: selectedSimulator, streamConfig, + streamConfigApplyKey, }); const updateStreamEncoder = useCallback((encoder: StreamEncoder) => { + streamConfigUserChangeAtRef.current = Date.now(); + setStreamConfigReady(true); + setStreamConfigApplyKey((current) => current + 1); setStreamConfig((current) => ({ ...current, encoder })); }, []); const updateStreamFps = useCallback((fps: StreamFps) => { + streamConfigUserChangeAtRef.current = Date.now(); + setStreamConfigReady(true); + setStreamConfigApplyKey((current) => current + 1); setStreamConfig((current) => ({ ...current, fps })); }, []); const updateStreamQuality = useCallback((quality: StreamQualityPreset) => { + streamConfigUserChangeAtRef.current = Date.now(); + setStreamConfigReady(true); + setStreamConfigApplyKey((current) => current + 1); setStreamConfig((current) => ({ ...current, quality })); }, []); @@ -899,19 +991,26 @@ export function AppShell({ }); const pairingRequired = + !remoteStream && pairingEnabled && listError === AUTH_REQUIRED_MESSAGE && !accessTokenFromLocation(); - const visibleListError = selectedSimulator - ? friendlyClientError(listError) - : listError; + const visibleListError = + remoteStream && listError === AUTH_REQUIRED_MESSAGE + ? "" + : selectedSimulator + ? friendlyClientError(listError) + : listError; const toolbarError = pairingRequired ? localError : localError || (selectedSimulator ? "" : visibleListError); - const streamStatusMessage = streamStatus.error + const visibleStreamError = friendlyStreamError(streamStatus.error, { + remote: remoteStream, + }); + const streamStatusMessage = visibleStreamError ? streamStatus.detail - ? `${streamStatus.error} ${streamStatus.detail}` - : streamStatus.error + ? `${visibleStreamError} ${streamStatus.detail}` + : visibleStreamError : ""; const viewportStatusOverlayLabel = simulatorStatusOverlayLabel || @@ -919,7 +1018,7 @@ export function AppShell({ (selectedSimulator ? visibleListError : ""); const viewportHasStreamError = Boolean( streamStatus.state === "error" || - streamStatus.error || + visibleStreamError || (selectedSimulator && visibleListError), ); const deviceTransform = `translate(${pan.x}px, ${pan.y + autoViewportOffsetY}px) scale(${effectiveZoom})`; @@ -1061,6 +1160,9 @@ export function AppShell({ if (sendWebRtcControlMessage(encoded)) { return true; } + if (remoteStream) { + return false; + } const state = ensureControlSocket(udid); if (state.socket.readyState === WebSocket.OPEN) { state.socket.send(encoded); @@ -1462,6 +1564,7 @@ export function AppShell({ onToggleTouchOverlay={() => setTouchOverlayVisible((current) => !current) } + remoteStream={remoteStream} search={search} selectedSimulator={selectedSimulator} selectedSimulatorIdentifier={selectedSimulatorDetail} @@ -1607,3 +1710,87 @@ function friendlyClientError(message: string): string { } return message; } + +function friendlyStreamError( + message: string | undefined, + options: { remote: boolean }, +): string { + const normalized = message?.trim() ?? ""; + if (!normalized) { + return ""; + } + if ( + options.remote && + normalized.toLowerCase().includes(AUTH_REQUIRED_MESSAGE.toLowerCase()) + ) { + return ""; + } + return friendlyClientError(normalized); +} + +function mergeStreamQualityResponse( + current: StreamConfig, + response: StreamQualityResponse, +): StreamConfig { + const quality = response.quality ?? {}; + const next: StreamConfig = { + ...current, + encoder: normalizeStreamEncoder( + quality.videoCodec ?? response.videoCodec, + current.encoder, + ), + fps: normalizeStreamFps(quality.fps, current.fps), + maxEdge: normalizeMaxEdge(quality.maxEdge, current.maxEdge), + quality: normalizeStreamQuality(quality.profile, current.quality), + }; + return streamConfigsEqual(current, next) ? current : next; +} + +function normalizeStreamEncoder( + value: string | undefined, + fallback: StreamEncoder, +): StreamEncoder { + const normalized = value?.trim().toLowerCase() as StreamEncoder | undefined; + return normalized && STREAM_ENCODER_VALUES.has(normalized) + ? normalized + : fallback; +} + +function normalizeStreamQuality( + value: string | undefined, + fallback: StreamQualityPreset, +): StreamQualityPreset { + const normalized = value?.trim().toLowerCase() as + | StreamQualityPreset + | undefined; + return normalized && STREAM_QUALITY_VALUES.has(normalized) + ? normalized + : fallback; +} + +function normalizeStreamFps( + value: number | undefined, + fallback: StreamFps, +): StreamFps { + return typeof value === "number" && Number.isFinite(value) && value > 0 + ? Math.round(value) + : fallback; +} + +function normalizeMaxEdge( + value: number | undefined, + fallback: number | undefined, +): number | undefined { + return typeof value === "number" && Number.isFinite(value) && value > 0 + ? Math.round(value) + : fallback; +} + +function streamConfigsEqual(left: StreamConfig, right: StreamConfig): boolean { + return ( + left.encoder === right.encoder && + left.fps === right.fps && + left.maxEdge === right.maxEdge && + left.quality === right.quality + ); +} diff --git a/client/src/features/simulators/SimulatorMenu.tsx b/client/src/features/simulators/SimulatorMenu.tsx index c5dc0ce..228ce32 100644 --- a/client/src/features/simulators/SimulatorMenu.tsx +++ b/client/src/features/simulators/SimulatorMenu.tsx @@ -29,6 +29,7 @@ interface SimulatorMenuProps { onToggleDebug: () => void; onToggleMenu: () => void; onToggleTouchOverlay: () => void; + remoteStream?: boolean; search: string; selectedSimulator: SimulatorMetadata | null; setSelectedUDID: (udid: string) => void; @@ -56,12 +57,27 @@ export function SimulatorMenu({ onToggleDebug, onToggleMenu, onToggleTouchOverlay, + remoteStream = false, search, selectedSimulator, setSelectedUDID, streamConfig, touchOverlayVisible, }: SimulatorMenuProps) { + const fpsOptions = remoteStream + ? REMOTE_STREAM_FPS_OPTIONS + : LOCAL_STREAM_FPS_OPTIONS; + const activeFpsOption = fpsOptions.some( + (option) => option.value === streamConfig.fps, + ) + ? [] + : [{ label: String(streamConfig.fps), value: streamConfig.fps }]; + const activeQualityOption = STREAM_QUALITY_OPTIONS.some( + (option) => option.value === streamConfig.quality, + ) + ? [] + : [{ label: streamConfig.quality, value: streamConfig.quality }]; + return (
- ))} + {[...activeQualityOption, ...STREAM_QUALITY_OPTIONS].map( + (option) => ( + + ), + )}
@@ -208,19 +231,27 @@ const STREAM_ENCODERS: Array<{ label: string; value: StreamEncoder }> = [ { label: "Software", value: "software" }, ]; -const STREAM_FPS_OPTIONS: Array<{ label: string; value: StreamFps }> = [ +const LOCAL_STREAM_FPS_OPTIONS: Array<{ label: string; value: StreamFps }> = [ { label: "30", value: 30 }, { label: "60", value: 60 }, { label: "120", value: 120 }, ]; +const REMOTE_STREAM_FPS_OPTIONS: Array<{ label: string; value: StreamFps }> = [ + { label: "15", value: 15 }, + { label: "30", value: 30 }, + { label: "60", value: 60 }, +]; + const STREAM_QUALITY_OPTIONS: Array<{ label: string; value: StreamQualityPreset; }> = [ { label: "Quality", value: "quality" }, + { label: "Smooth", value: "smooth" }, { label: "Balanced", value: "balanced" }, { label: "Fast", value: "fast" }, + { label: "Economy", value: "economy" }, ]; function MenuIcon() { @@ -230,3 +261,11 @@ function MenuIcon() { ); } + +function formatStreamConfigSummary(streamConfig: StreamConfig): string { + const resolution = + typeof streamConfig.maxEdge === "number" && streamConfig.maxEdge > 0 + ? `${streamConfig.maxEdge}px` + : "Full res"; + return `${resolution} / ${streamConfig.fps} fps`; +} diff --git a/client/src/features/stream/streamTypes.ts b/client/src/features/stream/streamTypes.ts index 38682c7..42044f9 100644 --- a/client/src/features/stream/streamTypes.ts +++ b/client/src/features/stream/streamTypes.ts @@ -8,12 +8,19 @@ export interface StreamConnectTarget { } export type StreamEncoder = "auto" | "hardware" | "software"; -export type StreamFps = 30 | 60 | 120; -export type StreamQualityPreset = "quality" | "balanced" | "fast"; +export type StreamFps = number; +export type StreamQualityPreset = + | "balanced" + | "ci-software" + | "economy" + | "fast" + | "quality" + | "smooth"; export interface StreamConfig { encoder: StreamEncoder; fps: StreamFps; + maxEdge?: number; quality: StreamQualityPreset; } diff --git a/client/src/features/stream/streamWorkerClient.ts b/client/src/features/stream/streamWorkerClient.ts index 045fe22..d8ca043 100644 --- a/client/src/features/stream/streamWorkerClient.ts +++ b/client/src/features/stream/streamWorkerClient.ts @@ -16,7 +16,8 @@ const WEBRTC_FIRST_FRAME_TIMEOUT_MS = 10000; const WEBRTC_STALLED_FRAME_TIMEOUT_MS = 3000; const WEBRTC_LOCAL_RECEIVER_BUFFER_SECONDS = 0.001; const WEBRTC_REMOTE_RECEIVER_BUFFER_SECONDS = 0.06; -const WEBRTC_DISCONNECTED_GRACE_MS = 1000; +const WEBRTC_LOCAL_DISCONNECTED_GRACE_MS = 1000; +const WEBRTC_REMOTE_DISCONNECTED_GRACE_MS = 10000; const WEBRTC_RECONNECT_BASE_DELAY_MS = 250; const WEBRTC_RECONNECT_MAX_DELAY_MS = 1000; @@ -170,6 +171,7 @@ interface StreamClientBackend { ): Promise; destroy(): void; disconnect(): void; + applyStreamConfig?(config?: StreamConfig): void | Promise; sendControl?(payload: unknown): boolean; } @@ -199,6 +201,7 @@ class WebRtcStreamClient implements StreamClientBackend { private receiverStatsInterval = 0; private receiverStatsSeen = false; private shouldReconnect = false; + private streamConfigGeneration = 0; private telemetryChannel: RTCDataChannel | null = null; private stats: StreamStats = createEmptyStreamStats(); private video: HTMLVideoElement | null = null; @@ -278,7 +281,7 @@ class WebRtcStreamClient implements StreamClientBackend { }); try { - await postStreamConfigWithAuthRetry(target); + await postStreamConfigWithAuthRetry(target.streamConfig); if (generation !== this.connectGeneration) { return; } @@ -472,6 +475,18 @@ class WebRtcStreamClient implements StreamClientBackend { return sendDataChannelMessage(this.controlChannel, JSON.stringify(payload)); } + async applyStreamConfig(config?: StreamConfig) { + if (!config) { + return; + } + const generation = ++this.streamConfigGeneration; + await postStreamConfigWithAuthRetry(config); + if (generation !== this.streamConfigGeneration) { + return; + } + this.sendControl({ forceKeyframe: true, type: "streamControl" }); + } + destroy() { this.disconnect(); } @@ -551,7 +566,7 @@ class WebRtcStreamClient implements StreamClientBackend { generation, new Error("WebRTC connection disconnected."), ); - }, WEBRTC_DISCONNECTED_GRACE_MS); + }, disconnectedGraceMs(target)); } private scheduleReconnect(target: StreamConnectTarget, generation: number) { @@ -611,11 +626,8 @@ class WebRtcStreamClient implements StreamClientBackend { return; } if (frameAgeMs > WEBRTC_STALLED_FRAME_TIMEOUT_MS) { - this.handleConnectionError( - target, - generation, - new Error("WebRTC video stalled after rendering frames."), - ); + this.sendControl({ snapshot: true, type: "streamControl" }); + this.scheduleFrameWatchdog(target, generation); return; } this.scheduleFrameWatchdog(target, generation); @@ -1031,12 +1043,12 @@ async function postWebRtcOfferWithAuthRetry( } async function postStreamConfigWithAuthRetry( - target: StreamConnectTarget, + config: StreamConfig | undefined, ): Promise { - if (!target.streamConfig) { + if (!config) { return; } - const response = await postStreamConfig(target.streamConfig); + const response = await postStreamConfig(config); if (response.status !== 401) { if (!response.ok) { throw new Error(await response.text()); @@ -1044,7 +1056,7 @@ async function postStreamConfigWithAuthRetry( return; } await fetchHealth(); - const retry = await postStreamConfig(target.streamConfig); + const retry = await postStreamConfig(config); if (!retry.ok) { throw new Error(await retry.text()); } @@ -1054,6 +1066,7 @@ function postStreamConfig(config: StreamConfig): Promise { return fetch(apiUrl("/api/stream-quality"), { body: JSON.stringify({ fps: config.fps, + maxEdge: config.maxEdge, profile: config.quality, videoCodec: config.encoder, }), @@ -1105,6 +1118,12 @@ function receiverBufferSeconds(target: StreamConnectTarget): number | null { : WEBRTC_LOCAL_RECEIVER_BUFFER_SECONDS; } +function disconnectedGraceMs(target: StreamConnectTarget): number { + return target.remote + ? WEBRTC_REMOTE_DISCONNECTED_GRACE_MS + : WEBRTC_LOCAL_DISCONNECTED_GRACE_MS; +} + function configureReceiverCodecPreferences(transceiver: RTCRtpTransceiver) { if (!transceiver.setCodecPreferences) { return; @@ -1344,6 +1363,19 @@ export class StreamWorkerClient { ); } + applyStreamConfig(config?: StreamConfig) { + try { + const result = this.backend?.applyStreamConfig?.(config); + if (result && typeof result.catch === "function") { + result.catch((error: unknown) => { + console.warn("Failed to apply stream configuration.", error); + }); + } + } catch (error) { + console.warn("Failed to apply stream configuration.", error); + } + } + destroy() { if (this.disposed) { return; diff --git a/client/src/features/stream/useLiveStream.ts b/client/src/features/stream/useLiveStream.ts index 03669a2..54c90ad 100644 --- a/client/src/features/stream/useLiveStream.ts +++ b/client/src/features/stream/useLiveStream.ts @@ -33,6 +33,7 @@ interface UseLiveStreamOptions { remote?: boolean; simulator: SimulatorMetadata | null; streamConfig?: StreamConfig; + streamConfigApplyKey?: number; } interface UseLiveStreamResult { @@ -93,6 +94,7 @@ export function useLiveStream({ remote = false, simulator, streamConfig, + streamConfigApplyKey = 0, }: UseLiveStreamOptions): UseLiveStreamResult { const clientTelemetryIdRef = useRef(""); const workerClientRef = useRef(null); @@ -295,6 +297,22 @@ export function useLiveStream({ paused, remote, streamConfig?.encoder, + ]); + + useEffect(() => { + if ( + streamConfigApplyKey <= 0 || + paused || + !simulator?.isBooted || + !streamConfig + ) { + return; + } + workerClientRef.current?.applyStreamConfig(streamConfig); + }, [ + paused, + simulator?.isBooted, + streamConfigApplyKey, streamConfig?.fps, streamConfig?.quality, ]); @@ -309,8 +327,9 @@ export function useLiveStream({ const latestStatus = latestStatusRef.current; const now = Date.now(); if ( + !remote && now - lastVisualArtifactSampleAtRef.current >= - VISUAL_ARTIFACT_TELEMETRY_INTERVAL_MS + VISUAL_ARTIFACT_TELEMETRY_INTERVAL_MS ) { lastVisualArtifactSampleAtRef.current = now; const visualSample = await workerClientRef.current diff --git a/client/src/features/toolbar/Toolbar.tsx b/client/src/features/toolbar/Toolbar.tsx index 9da5f69..00baccd 100644 --- a/client/src/features/toolbar/Toolbar.tsx +++ b/client/src/features/toolbar/Toolbar.tsx @@ -34,6 +34,7 @@ interface ToolbarProps { onToggleHierarchy: () => void; onToggleMenu: () => void; onToggleTouchOverlay: () => void; + remoteStream?: boolean; search: string; selectedSimulator: SimulatorMetadata | null; selectedSimulatorIdentifier: string; @@ -75,6 +76,7 @@ export function Toolbar({ onToggleHierarchy, onToggleMenu, onToggleTouchOverlay, + remoteStream = false, search, selectedSimulator, selectedSimulatorIdentifier, @@ -135,6 +137,7 @@ export function Toolbar({ onToggleDebug={onToggleDebug} onToggleMenu={onToggleMenu} onToggleTouchOverlay={onToggleTouchOverlay} + remoteStream={remoteStream} search={search} selectedSimulator={selectedSimulator} setSelectedUDID={setSelectedUDID} diff --git a/client/src/styles/components.css b/client/src/styles/components.css index 8e81f11..145fd0c 100644 --- a/client/src/styles/components.css +++ b/client/src/styles/components.css @@ -321,6 +321,13 @@ padding-top: 0; } +.menu-section-heading { + display: flex; + align-items: baseline; + justify-content: space-between; + gap: 12px; +} + .menu-section-title { color: var(--text-muted); font-size: 11px; @@ -329,6 +336,17 @@ text-transform: uppercase; } +.menu-section-meta { + min-width: 0; + overflow: hidden; + color: var(--text-secondary); + font-size: 11px; + line-height: 1.2; + text-align: right; + text-overflow: ellipsis; + white-space: nowrap; +} + .menu-segment { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); diff --git a/docs/guide/video.md b/docs/guide/video.md index 050c399..01e8305 100644 --- a/docs/guide/video.md +++ b/docs/guide/video.md @@ -32,7 +32,7 @@ It is CLI-only because it is meant for less capable machines where freshness matters more than maximum smoothness. The requested encoder mode is reported to clients in the JSON `videoCodec` field on `GET /api/health`. -The browser UI exposes stream controls for encoder, FPS, and quality. Local browser sessions default to hardware H.264, 120 fps, and `quality`/full resolution; remote browser sessions default to software H.264, 30 fps, and `balanced`. +The browser UI exposes stream controls for encoder, FPS, and quality. Local browser sessions default to hardware H.264, 120 fps, and `quality`/full resolution with FPS choices of 30, 60, and 120. Remote browser sessions default to software H.264, 30 fps, and `balanced` with FPS choices of 15, 30, and 60. ## Remote WebRTC ICE diff --git a/package.json b/package.json index 0c83344..cc40ee5 100644 --- a/package.json +++ b/package.json @@ -68,6 +68,7 @@ "test:stream:reliability": "node scripts/check-stream-reliability.mjs", "test:e2e:webrtc": "node scripts/e2e-webrtc-reliability.mjs", "test:e2e:webrtc:headed": "SIMDECK_E2E_HEADFUL=1 node scripts/e2e-webrtc-reliability.mjs", + "test:e2e:webrtc:stress": "node scripts/e2e-webrtc-stress.mjs", "test:studio-provider": "node --test scripts/studio-provider-bridge.test.mjs", "test:stress": "node scripts/stress/simdeck.mjs", "bench:encoder:build": "scripts/bench/build-encoder-benchmark.sh", diff --git a/scripts/e2e-webrtc-reliability.mjs b/scripts/e2e-webrtc-reliability.mjs index e9fbd8a..f659868 100644 --- a/scripts/e2e-webrtc-reliability.mjs +++ b/scripts/e2e-webrtc-reliability.mjs @@ -1,6 +1,6 @@ #!/usr/bin/env node -import { mkdtemp, rm } from "node:fs/promises"; +import { mkdtemp, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { spawn } from "node:child_process"; @@ -8,6 +8,8 @@ import { spawn } from "node:child_process"; const serverUrl = new URL( process.env.SIMDECK_SERVER_URL ?? process.argv[2] ?? "http://127.0.0.1:4310", ); +const apiRootPath = + process.env.SIMDECK_E2E_API_ROOT ?? apiRootPathForViewerUrl(serverUrl); const durationMs = Number( process.env.SIMDECK_E2E_WEBRTC_MS ?? process.argv[3] ?? 60_000, ); @@ -20,12 +22,15 @@ const maxFrameGapMs = Number(process.env.SIMDECK_E2E_MAX_FRAME_GAP_MS ?? 250); const maxInteractionLatencyMs = Number( process.env.SIMDECK_E2E_MAX_INTERACTION_LATENCY_MS ?? 750, ); +const interactionsEnabled = process.env.SIMDECK_E2E_INTERACTIONS !== "0"; const maxPeerDisconnectedMs = Number( process.env.SIMDECK_E2E_MAX_PEER_DISCONNECTED_MS ?? 1000, ); +const maxDecoderDrops = Number(process.env.SIMDECK_E2E_MAX_DECODER_DROPS ?? 0); const visualSampleIntervalMs = Number( - process.env.SIMDECK_E2E_VISUAL_SAMPLE_INTERVAL_MS ?? 1000, + process.env.SIMDECK_E2E_VISUAL_SAMPLE_INTERVAL_MS ?? 5000, ); +const visualSamplesEnabled = visualSampleIntervalMs > 0; const maxVisualMeanDiff = Number( process.env.SIMDECK_E2E_MAX_VISUAL_MEAN_DIFF ?? 18, ); @@ -35,9 +40,29 @@ const maxVisualBadPixelRatio = Number( const maxVisualTileDiff = Number( process.env.SIMDECK_E2E_MAX_VISUAL_TILE_DIFF ?? 42, ); +const maxVisualFailureRatio = Number( + process.env.SIMDECK_E2E_MAX_VISUAL_FAILURE_RATIO ?? 0.2, +); +const maxConsecutiveVisualFailures = Number( + process.env.SIMDECK_E2E_MAX_CONSECUTIVE_VISUAL_FAILURES ?? 1, +); +const screenshotApiRoot = process.env.SIMDECK_E2E_SCREENSHOT_API_ROOT + ? new URL(process.env.SIMDECK_E2E_SCREENSHOT_API_ROOT) + : null; +const screenshotApiToken = process.env.SIMDECK_E2E_SCREENSHOT_API_TOKEN ?? ""; +const outputJsonPath = process.env.SIMDECK_E2E_OUTPUT_JSON ?? ""; +const requireVisualSamples = process.env.SIMDECK_E2E_REQUIRE_VISUAL !== "0"; function endpoint(path) { - return new URL(path, serverUrl).toString(); + return new URL(`${apiRootPath}${path}`, serverUrl).toString(); +} + +function apiRootPathForViewerUrl(url) { + const match = url.pathname.match(/^\/simulator\/([^/]+)/); + if (!match) { + return ""; + } + return `/api/provider-sessions/${match[1]}/simdeck`; } async function fetchJson(url, init) { @@ -50,6 +75,32 @@ async function fetchJson(url, init) { return response.json(); } +async function fetchReferenceScreenshotDataUrl(udid) { + if (!screenshotApiRoot) { + return null; + } + const url = new URL( + `/api/simulators/${encodeURIComponent(udid)}/screenshot.png`, + screenshotApiRoot, + ); + url.searchParams.set("artifactCheck", Date.now().toString()); + const headers = screenshotApiToken + ? { "x-simdeck-token": screenshotApiToken } + : undefined; + const response = await fetch(url, { + cache: "no-store", + headers, + }); + if (!response.ok) { + throw new Error( + `reference screenshot failed with ${response.status}: ${await response.text()}`, + ); + } + const contentType = response.headers.get("content-type") ?? "image/png"; + const base64 = Buffer.from(await response.arrayBuffer()).toString("base64"); + return `data:${contentType};base64,${base64}`; +} + async function waitForDevTools() { const versionUrl = `http://127.0.0.1:${debugPort}/json/version`; const startedAt = Date.now(); @@ -135,6 +186,28 @@ function numeric(value) { return typeof value === "number" && Number.isFinite(value) ? value : 0; } +function isVisualFailure(sample) { + return ( + sample.meanDiff > maxVisualMeanDiff || + sample.badPixelRatio > maxVisualBadPixelRatio || + sample.maxTileMeanDiff > maxVisualTileDiff + ); +} + +function maxConsecutiveMatches(values, predicate) { + let current = 0; + let max = 0; + for (const value of values) { + if (predicate(value)) { + current += 1; + max = Math.max(max, current); + } else { + current = 0; + } + } + return max; +} + function findClientStreams(metrics, clientId) { return (metrics.client_streams ?? []).filter( (stream) => stream.clientId === clientId, @@ -236,6 +309,7 @@ try { const interactionLatencies = []; const presentedInteractionLatencies = []; const visualSamples = []; + const visualSampleErrors = []; while (Date.now() - startedAt < durationMs) { await sleep(pollMs); const elapsed = Date.now() - startedAt; @@ -268,17 +342,24 @@ try { const visualUdid = latestByKind(streams, "webrtc")?.udid ?? latestByKind(streams, "page")?.udid; - if (visualUdid && elapsed - lastVisualSampleAt >= visualSampleIntervalMs) { + if ( + visualSamplesEnabled && + visualUdid && + elapsed - lastVisualSampleAt >= visualSampleIntervalMs + ) { lastVisualSampleAt = elapsed; const visualSample = await collectVisualArtifactSample( cdp, visualUdid, - ).catch(() => null); + ).catch((error) => { + visualSampleErrors.push(String(error?.message ?? error)); + return null; + }); if (visualSample) { visualSamples.push(visualSample); } } - if (elapsed - lastInteractionAt >= 5000) { + if (interactionsEnabled && elapsed - lastInteractionAt >= 5000) { lastInteractionAt = elapsed; const beforeInteraction = await collectDirectWebRtcStats(cdp); await interactWithSimulatorViewport(cdp, elapsed); @@ -340,9 +421,9 @@ try { `browser stats did not advance decoded/presented/RTP frames: decoded=${directDecodedDelta} presented=${directPresentedDelta} received=${directPacketsDelta}`, ); } - if (droppedDelta > 0 || directDroppedDelta > 0) { + if (droppedDelta > maxDecoderDrops || directDroppedDelta > maxDecoderDrops) { failures.push( - `decoder dropped frames: metrics=${droppedDelta} getStats=${directDroppedDelta}`, + `decoder dropped frames: metrics=${droppedDelta} getStats=${directDroppedDelta} max=${maxDecoderDrops}`, ); } if (reconnectDelta > 0) { @@ -369,15 +450,19 @@ try { `presented video did not advance within ${maxInteractionLatencyMs}ms after ${slowPresentedInteractions.length} interactions`, ); } - const visualFailures = visualSamples.filter( - (sample) => - sample.meanDiff > maxVisualMeanDiff || - sample.badPixelRatio > maxVisualBadPixelRatio || - sample.maxTileMeanDiff > maxVisualTileDiff, + const visualFailures = visualSamples.filter(isVisualFailure); + const visualFailureRatio = + visualSamples.length > 0 ? visualFailures.length / visualSamples.length : 0; + const consecutiveVisualFailures = maxConsecutiveMatches( + visualSamples, + isVisualFailure, ); - if (visualSamples.length === 0) { + if (requireVisualSamples && visualSamples.length === 0) { failures.push("no visual artifact samples were collected"); - } else if (visualFailures.length > 0) { + } else if ( + visualFailureRatio > maxVisualFailureRatio || + consecutiveVisualFailures > maxConsecutiveVisualFailures + ) { const worst = visualFailures .slice() .sort((a, b) => b.maxTileMeanDiff - a.maxTileMeanDiff)[0]; @@ -388,6 +473,7 @@ try { } const summary = { + apiRootPath, clientId, directStatsEnd, directStatsStart, @@ -401,14 +487,23 @@ try { maxPeerDisconnectedMs, maxPeerDisconnectedObservedMs, maxInteractionLatencyMs, + maxDecoderDrops, + interactionsEnabled, + visualSamplesEnabled, interactionLatencies, presentedInteractionLatencies, visualThresholds: { maxVisualBadPixelRatio, maxVisualMeanDiff, maxVisualTileDiff, + maxVisualFailureRatio, + maxConsecutiveVisualFailures, }, + visualFailureRatio, + consecutiveVisualFailures, + screenshotApiRoot: screenshotApiRoot?.origin ?? null, visualSamples, + visualSampleErrors: visualSampleErrors.slice(0, 10), renderedDelta, decodedDelta, receivedDelta, @@ -427,10 +522,14 @@ try { })), }; if (failures.length > 0) { - console.error(JSON.stringify({ ...summary, failures }, null, 2)); + const result = { ...summary, failures }; + await writeSummary(result); + console.error(JSON.stringify(result, null, 2)); process.exitCode = 1; } else { - console.log(JSON.stringify({ ...summary, ok: true }, null, 2)); + const result = { ...summary, ok: true }; + await writeSummary(result); + console.log(JSON.stringify(result, null, 2)); } } finally { cdp?.close(); @@ -438,6 +537,13 @@ try { await rm(profileDir, { force: true, recursive: true }); } +async function writeSummary(summary) { + if (!outputJsonPath) { + return; + } + await writeFile(outputJsonPath, JSON.stringify(summary, null, 2)); +} + async function stopChrome(chromeProcess) { if (chromeProcess.exitCode !== null || chromeProcess.signalCode !== null) { return; @@ -556,6 +662,10 @@ async function waitForPresentedFrameAfterInteraction( } async function collectVisualArtifactSample(cdp, udid) { + const referenceDataUrl = await fetchReferenceScreenshotDataUrl(udid); + const screenshotPath = `${apiRootPath}/api/simulators/${encodeURIComponent( + udid, + )}/screenshot.png`; return evaluate( cdp, ` @@ -565,13 +675,7 @@ async function collectVisualArtifactSample(cdp, udid) { throw new Error("live video is not ready for visual comparison"); } - const response = await fetch("/api/simulators/${udid}/screenshot.png?artifactCheck=" + Date.now(), { - cache: "no-store", - }); - if (!response.ok) { - throw new Error("native screenshot failed with " + response.status); - } - const source = await createImageBitmap(await response.blob()); + const source = await loadReferenceScreenshot(); const width = Math.min(240, video.videoWidth, source.width); const height = Math.max(1, Math.round(width * (video.videoHeight / video.videoWidth))); const canvas = document.createElement("canvas"); @@ -633,6 +737,22 @@ async function collectVisualArtifactSample(cdp, udid) { videoWidth: video.videoWidth, width, }; + + async function loadReferenceScreenshot() { + const dataUrl = ${JSON.stringify(referenceDataUrl)}; + if (dataUrl) { + return createImageBitmap(await (await fetch(dataUrl)).blob()); + } + const response = await fetch(new URL(${JSON.stringify( + screenshotPath, + )} + "?artifactCheck=" + Date.now(), window.location.href).toString(), { + cache: "no-store", + }); + if (!response.ok) { + throw new Error("native screenshot failed with " + response.status); + } + return createImageBitmap(await response.blob()); + } })() `, ); diff --git a/scripts/e2e-webrtc-stress.mjs b/scripts/e2e-webrtc-stress.mjs new file mode 100644 index 0000000..8e3047a --- /dev/null +++ b/scripts/e2e-webrtc-stress.mjs @@ -0,0 +1,328 @@ +#!/usr/bin/env node + +import { spawn } from "node:child_process"; +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const viewerUrl = new URL( + process.env.SIMDECK_STRESS_VIEWER_URL ?? + process.argv[2] ?? + "http://127.0.0.1:4310", +); +const durationMs = positiveInt( + process.env.SIMDECK_STRESS_WEBRTC_MS ?? process.argv[3], + 60_000, +); +const totalClients = positiveInt(process.env.SIMDECK_STRESS_CLIENTS, 10); +const steadyClients = Math.min( + totalClients, + positiveInt(process.env.SIMDECK_STRESS_STEADY_CLIENTS, 5), +); +const churnClients = Math.max(0, totalClients - steadyClients); +const churnSessionMs = positiveInt( + process.env.SIMDECK_STRESS_CHURN_SESSION_MS, + 12_000, +); +const chromePortBase = positiveInt( + process.env.SIMDECK_STRESS_CHROME_PORT_BASE, + 9400, +); +const maxPeerDisconnectedMs = positiveInt( + process.env.SIMDECK_STRESS_MAX_PEER_DISCONNECTED_MS, + 3000, +); +const maxDecoderDrops = positiveInt( + process.env.SIMDECK_STRESS_MAX_DECODER_DROPS, + 0, +); +const settleMs = positiveInt(process.env.SIMDECK_STRESS_SETTLE_MS, 15_000); +const childScript = new URL("./e2e-webrtc-reliability.mjs", import.meta.url); +const childScriptPath = fileURLToPath(childScript); +const outputDir = await mkdtemp(join(tmpdir(), "simdeck-webrtc-stress-")); + +const startedAt = Date.now(); +const initialMetrics = await fetchMetrics().catch((error) => ({ + error: String(error?.message ?? error), +})); +const workers = []; + +for (let index = 0; index < steadyClients; index += 1) { + workers.push( + runViewer({ + label: `steady-${index + 1}`, + durationMs, + port: chromePortBase + index, + }), + ); +} + +for (let index = 0; index < churnClients; index += 1) { + workers.push( + runChurnViewer({ + index, + label: `churn-${index + 1}`, + port: chromePortBase + steadyClients + index, + }), + ); +} + +const results = await Promise.all(workers); +await sleep(settleMs); +const finalMetrics = await fetchMetrics().catch((error) => ({ + error: String(error?.message ?? error), +})); +const failures = results.flatMap((result) => result.failures); +const activeStreamLeak = Math.max( + 0, + numeric(finalMetrics.active_streams) - numeric(initialMetrics.active_streams), +); +if (activeStreamLeak > 0) { + failures.push( + `active stream count did not return to baseline after ${settleMs}ms: initial=${numeric( + initialMetrics.active_streams, + )} final=${numeric(finalMetrics.active_streams)}`, + ); +} +const completedRuns = results.reduce( + (sum, result) => sum + result.runs.length, + 0, +); +const successfulRuns = results.reduce( + (sum, result) => sum + result.runs.filter((run) => run.ok).length, + 0, +); +const reconnects = results.reduce( + (sum, result) => + sum + + result.runs.reduce( + (innerSum, run) => innerSum + numeric(run.summary?.reconnectDelta), + 0, + ), + 0, +); +const decoderDrops = results.reduce( + (sum, result) => + sum + + result.runs.reduce((innerSum, run) => { + const directDroppedDelta = Math.max( + 0, + numeric(run.summary?.directStatsEnd?.framesDropped) - + numeric(run.summary?.directStatsStart?.framesDropped), + ); + return ( + innerSum + + Math.max(numeric(run.summary?.droppedDelta), directDroppedDelta) + ); + }, 0), + 0, +); +const elapsedMs = Date.now() - startedAt; +const summary = { + ok: failures.length === 0, + viewerUrl: viewerUrl.toString(), + durationMs, + totalClients, + steadyClients, + churnClients, + churnSessionMs, + settleMs, + completedRuns, + successfulRuns, + reconnects, + decoderDrops, + maxDecoderDrops, + activeStreamLeak, + elapsedMs, + initialMetrics, + finalMetrics, + results: results.map((result) => ({ + label: result.label, + runs: result.runs.map((run) => ({ + ok: run.ok, + exitCode: run.exitCode, + durationMs: run.durationMs, + reconnectDelta: run.summary?.reconnectDelta, + decodedDelta: run.summary?.decodedDelta, + receivedDelta: run.summary?.receivedDelta, + droppedDelta: run.summary?.droppedDelta, + maxObservedFrameGapMs: run.summary?.maxObservedFrameGapMs, + maxPeerDisconnectedObservedMs: run.summary?.maxPeerDisconnectedObservedMs, + })), + failures: result.failures, + })), + failures, +}; + +console.log(JSON.stringify(summary, null, 2)); +await rm(outputDir, { force: true, recursive: true }); +if (!summary.ok) { + process.exit(1); +} + +async function runChurnViewer({ index, label, port }) { + const runs = []; + const failures = []; + let iteration = 0; + while (Date.now() - startedAt < durationMs) { + iteration += 1; + const remaining = durationMs - (Date.now() - startedAt); + if (remaining < 3000) { + break; + } + const runDurationMs = Math.min(churnSessionMs, remaining); + const run = await runViewerOnce({ + label: `${label}-${iteration}`, + durationMs: runDurationMs, + port: port + iteration * totalClients, + }); + runs.push(run); + if (!run.ok) { + failures.push(`${label}-${iteration}: ${run.failure}`); + } + await sleep(250 + index * 50); + } + return { label, runs, failures }; +} + +async function runViewer({ label, durationMs, port }) { + const run = await runViewerOnce({ label, durationMs, port }); + return { + label, + runs: [run], + failures: run.ok ? [] : [`${label}: ${run.failure}`], + }; +} + +function runViewerOnce({ label, durationMs, port }) { + const started = Date.now(); + return new Promise((resolve) => { + let stdout = ""; + let stderr = ""; + const outputJsonPath = join(outputDir, `${label}.json`); + const child = spawn( + process.execPath, + [childScriptPath, viewerUrl.toString(), String(durationMs)], + { + env: { + ...process.env, + SIMDECK_E2E_CHROME_PORT: String(port), + SIMDECK_E2E_INTERACTIONS: "0", + SIMDECK_E2E_MAX_PEER_DISCONNECTED_MS: String(maxPeerDisconnectedMs), + SIMDECK_E2E_MAX_DECODER_DROPS: String(maxDecoderDrops), + SIMDECK_E2E_OUTPUT_JSON: outputJsonPath, + SIMDECK_E2E_REQUIRE_VISUAL: "0", + SIMDECK_E2E_VISUAL_SAMPLE_INTERVAL_MS: + process.env.SIMDECK_E2E_VISUAL_SAMPLE_INTERVAL_MS ?? "0", + SIMDECK_E2E_WEBRTC_MS: String(durationMs), + }, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + child.stdout.on("data", (chunk) => { + stdout += chunk.toString(); + }); + child.stderr.on("data", (chunk) => { + stderr += chunk.toString(); + }); + const timeout = setTimeout(() => { + child.kill("SIGTERM"); + setTimeout(() => { + child.kill("SIGKILL"); + }, 5000).unref(); + }, durationMs + 90_000).unref(); + child.on("close", async (exitCode, signal) => { + clearTimeout(timeout); + const summary = + (await readSummaryFile(outputJsonPath)) ?? parseLastJsonObject(stdout); + const ok = exitCode === 0 && summary?.ok === true; + resolve({ + ok, + label, + exitCode, + signal, + durationMs: Date.now() - started, + summary, + failure: ok + ? "" + : summary?.failures?.join("; ") || + stderr.trim().slice(-1000) || + stdout.trim().slice(-1000) || + `viewer exited with ${exitCode ?? signal}`, + }); + }); + }); +} + +async function readSummaryFile(path) { + try { + return JSON.parse(await readFile(path, "utf8")); + } catch { + return null; + } +} + +function parseLastJsonObject(output) { + const first = output.indexOf("{"); + const last = output.lastIndexOf("}"); + if (first >= 0 && last > first) { + try { + return JSON.parse(output.slice(first, last + 1)); + } catch { + // Fall back to scanning below when stdout contains extra braces. + } + } + for ( + let index = output.lastIndexOf("{"); + index >= 0; + index = output.lastIndexOf("{", index - 1) + ) { + try { + return JSON.parse(output.slice(index)); + } catch { + continue; + } + } + return null; +} + +async function fetchMetrics() { + const response = await fetch(endpoint("/api/metrics"), { + signal: AbortSignal.timeout(10_000), + }); + if (!response.ok) { + throw new Error( + `metrics returned ${response.status}: ${await response.text()}`, + ); + } + return response.json(); +} + +function endpoint(path) { + return new URL( + `${apiRootPathForViewerUrl(viewerUrl)}${path}`, + viewerUrl, + ).toString(); +} + +function apiRootPathForViewerUrl(url) { + const match = url.pathname.match(/^\/simulator\/([^/]+)/); + if (!match) { + return ""; + } + return `/api/provider-sessions/${match[1]}/simdeck`; +} + +function positiveInt(value, fallback) { + const parsed = Number(value); + return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : fallback; +} + +function numeric(value) { + return typeof value === "number" && Number.isFinite(value) ? value : 0; +} + +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/scripts/studio-provider-bridge.mjs b/scripts/studio-provider-bridge.mjs index 169bc01..7ae27f8 100644 --- a/scripts/studio-provider-bridge.mjs +++ b/scripts/studio-provider-bridge.mjs @@ -1,8 +1,13 @@ #!/usr/bin/env node import crypto from "node:crypto"; +import { execFile } from "node:child_process"; +import fs from "node:fs"; import os from "node:os"; import { pathToFileURL } from "node:url"; +import { promisify } from "node:util"; + +const execFileAsync = promisify(execFile); const cloudUrl = ( process.env.SIMDECK_CLOUD_URL || "https://simdeck.djdev.me" @@ -10,7 +15,7 @@ const cloudUrl = ( let previewId = process.env.PREVIEW_ID || ""; let providerToken = process.env.PROVIDER_TOKEN || ""; let publicUrl = process.env.SIMDECK_STUDIO_URL || ""; -const localUrl = ( +let localUrl = ( process.env.SIMDECK_LOCAL_URL || "http://127.0.0.1:4310" ).replace(/\/$/, ""); let localToken = process.env.SIMDECK_LOCAL_TOKEN || providerToken; @@ -25,16 +30,45 @@ const proxyTimeoutMs = Math.max( 1000, Number(process.env.SIMDECK_PROVIDER_PROXY_TIMEOUT_MS || 25000), ); +const cloudRequestTimeoutMs = Math.max( + 5000, + Number(process.env.SIMDECK_PROVIDER_CLOUD_TIMEOUT_MS || 30000), +); const simulatorListCacheTtlMs = Math.max( 0, Number(process.env.SIMDECK_PROVIDER_SIMULATORS_CACHE_MS || 5000), ); +const localUnavailableLogIntervalMs = Math.max( + 5000, + Number( + process.env.SIMDECK_PROVIDER_LOCAL_UNAVAILABLE_LOG_INTERVAL_MS || 30000, + ), +); +const localUnavailableRestartMs = Math.max( + 15000, + Number(process.env.SIMDECK_PROVIDER_LOCAL_UNAVAILABLE_RESTART_MS || 45000), +); const providerId = process.env.SIMDECK_STUDIO_PROVIDER_ID || stableLocalProviderId(); +const parentPid = Number(process.env.SIMDECK_PROVIDER_PARENT_PID || 0); +let localDaemonPid = Number(process.env.SIMDECK_LOCAL_DAEMON_PID || 0); +let localDaemonLog = process.env.SIMDECK_LOCAL_DAEMON_LOG || ""; +const localDaemonCommand = process.env.SIMDECK_LOCAL_DAEMON_COMMAND || ""; +const localDaemonRestartArgs = parseJsonArrayEnv( + "SIMDECK_LOCAL_DAEMON_RESTART_ARGS_JSON", +); +const localDaemonStatusArgs = parseJsonArrayEnv( + "SIMDECK_LOCAL_DAEMON_STATUS_ARGS_JSON", +) ?? ["daemon", "status"]; let stopped = false; let lastRegisterAt = 0; +let localUnavailableSince = 0; +let lastLocalUnavailableLogAt = 0; +let lastLocalRestartAt = 0; +let localRestartInFlight = null; let registered = false; +let providerMarkedTerminal = false; const activeRequests = new Set(); const responseCache = new Map(); const inFlightCache = new Map(); @@ -45,6 +79,15 @@ if (isMainModule()) { stopped = true; }); } + if (Number.isInteger(parentPid) && parentPid > 0) { + setInterval(() => { + try { + process.kill(parentPid, 0); + } catch { + stopped = true; + } + }, 1000).unref(); + } try { if (!previewId || !providerToken) { @@ -97,7 +140,7 @@ if (isMainModule()) { if (activeRequests.size > 0) { await Promise.allSettled(activeRequests); } - if (registered) { + if (registered && !providerMarkedTerminal) { await markProviderExpired(); } } @@ -105,7 +148,13 @@ if (isMainModule()) { async function registerProvider() { try { - const metadata = await localProviderMetadata(); + let metadata = await localProviderMetadata(); + updateLocalAvailability(metadata); + await maybeRestartLocalDaemon(metadata); + if (!metadata.ok && !localUnavailableSince) { + metadata = await localProviderMetadata(); + updateLocalAvailability(metadata); + } await fetchJson(`${cloudUrl}/api/actions/providers/register`, { previewId, providerToken, @@ -120,6 +169,14 @@ async function registerProvider() { }); registered = true; lastRegisterAt = Date.now(); + if (shouldStopForLocalMetadata(metadata, localDaemonProcessExited())) { + providerMarkedTerminal = true; + stopped = true; + await markProviderFailed( + metadata.failureReason || + "Local SimDeck daemon supervisor process exited.", + ); + } } catch (error) { console.error( `[simdeck-provider-bridge] provider registration failed: ${error instanceof Error ? error.message : String(error)}`, @@ -127,6 +184,146 @@ async function registerProvider() { } } +export function shouldStopForLocalMetadata(metadata, daemonProcessExited) { + return !metadata.ok && daemonProcessExited; +} + +function localDaemonProcessExited() { + if ( + Number.isInteger(localDaemonPid) && + localDaemonPid > 0 && + !processIsRunning(localDaemonPid) + ) { + console.error( + `[simdeck-provider-bridge] local SimDeck daemon process ${localDaemonPid} is no longer running.`, + ); + printRecentDaemonLog(); + return true; + } + return false; +} + +function updateLocalAvailability(metadata) { + if (metadata.ok) { + localUnavailableSince = 0; + return; + } + localUnavailableSince ||= Date.now(); + const elapsed = Date.now() - localUnavailableSince; + if (Date.now() - lastLocalUnavailableLogAt >= localUnavailableLogIntervalMs) { + lastLocalUnavailableLogAt = Date.now(); + console.error( + `[simdeck-provider-bridge] local SimDeck HTTP unavailable for ${elapsed}ms while daemon supervisor is still running; keeping Studio bridge alive.`, + ); + if (metadata.failureReason) { + console.error(`[simdeck-provider-bridge] ${metadata.failureReason}`); + } + printRecentDaemonLog(); + } +} + +function processIsRunning(pid) { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +function printRecentDaemonLog() { + const lines = recentDaemonLogLines(); + if (!lines) { + return; + } + console.error("[simdeck-provider-bridge] recent daemon log:"); + console.error(lines); +} + +function recentDaemonLogLines() { + if (!localDaemonLog) { + return ""; + } + try { + const data = fs.readFileSync(localDaemonLog, "utf8"); + return data.split(/\r?\n/).filter(Boolean).slice(-20).join("\n"); + } catch { + return ""; + } +} + +async function maybeRestartLocalDaemon(metadata) { + if (metadata.ok || stopped || !localUnavailableSince) { + return; + } + if (!localDaemonCommand || !localDaemonRestartArgs) { + return; + } + const elapsed = Date.now() - localUnavailableSince; + if (elapsed < localUnavailableRestartMs) { + return; + } + if (localRestartInFlight) { + await localRestartInFlight; + return; + } + if (Date.now() - lastLocalRestartAt < localUnavailableRestartMs) { + return; + } + + lastLocalRestartAt = Date.now(); + localRestartInFlight = restartLocalDaemon() + .catch((error) => { + console.error( + `[simdeck-provider-bridge] local SimDeck daemon restart failed: ${describeError(error)}`, + ); + }) + .finally(() => { + localRestartInFlight = null; + }); + await localRestartInFlight; +} + +async function restartLocalDaemon() { + console.error( + `[simdeck-provider-bridge] local SimDeck HTTP has been unavailable for ${Date.now() - localUnavailableSince}ms; restarting local daemon.`, + ); + printRecentDaemonLog(); + await execFileAsync(localDaemonCommand, localDaemonRestartArgs, { + timeout: 90_000, + windowsHide: true, + }); + const { stdout } = await execFileAsync( + localDaemonCommand, + localDaemonStatusArgs, + { + timeout: 15_000, + windowsHide: true, + }, + ); + const status = JSON.parse(stdout); + const daemon = status.daemon ?? status; + if (daemon.httpUrl) { + localUrl = String(daemon.httpUrl).replace(/\/$/, ""); + } + if (daemon.accessToken) { + localToken = String(daemon.accessToken); + } + if (daemon.pid) { + localDaemonPid = Number(daemon.pid); + } + if (daemon.logPath) { + localDaemonLog = String(daemon.logPath); + } + responseCache.clear(); + inFlightCache.clear(); + localUnavailableSince = 0; + lastLocalUnavailableLogAt = 0; + console.error( + `[simdeck-provider-bridge] local SimDeck daemon restarted at ${localUrl}.`, + ); +} + async function createLocalProviderSession() { const response = await fetchJson(`${cloudUrl}/api/local-provider-sessions`, { providerId, @@ -140,25 +337,38 @@ async function createLocalProviderSession() { } async function markProviderExpired() { + await markProviderStatus("expired"); +} + +async function markProviderFailed(reason) { + if (reason) { + console.error(`[simdeck-provider-bridge] ${reason}`); + } + await markProviderStatus("failed"); +} + +async function markProviderStatus(status) { try { await fetchJson(`${cloudUrl}/api/actions/providers/register`, { previewId, providerToken, baseUrl: publicUrl, - status: "expired", + status, }); } catch (error) { console.error( - `[simdeck-provider-bridge] provider expiration failed: ${error instanceof Error ? error.message : String(error)}`, + `[simdeck-provider-bridge] provider ${status} update failed: ${error instanceof Error ? error.message : String(error)}`, ); } } async function localProviderMetadata() { let health = null; + let healthError = null; try { health = await localJson("/api/health"); - } catch { + } catch (error) { + healthError = error; health = null; } @@ -169,26 +379,90 @@ async function localProviderMetadata() { simulators.simulators?.[0] ?? null; return { health, ok: true, simulator: selected }; - } catch { + } catch (error) { if (health) { return { health, ok: true, simulator: null }; } - return { health: null, ok: false, simulator: null }; + return { + failureReason: localProviderFailureReason(healthError, error), + health: null, + ok: false, + simulator: null, + }; } } +function localProviderFailureReason(healthError, simulatorError) { + const healthMessage = describeError(healthError); + const simulatorMessage = describeError(simulatorError); + return [healthMessage, simulatorMessage].filter(Boolean).join("; "); +} + +function describeError(error) { + if (!error) { + return ""; + } + if (!(error instanceof Error)) { + return String(error); + } + const cause = error.cause; + if (cause instanceof Error && cause.message) { + return `${error.message}: ${cause.message}`; + } + return error.message; +} + async function handleRequest(request) { - try { - const responsePayload = await cachedProxyResponse(request); + let responsePayload; + if (isWebSocketUpgradeRequest(request)) { await complete({ requestId: request.id, - ...responsePayload, + responseBodyBase64: Buffer.from( + "Studio provider RPC does not tunnel WebSocket upgrade requests.", + ).toString("base64"), + responseHeaders: { "content-type": "text/plain; charset=utf-8" }, + responseStatus: 426, }); + return; + } + try { + responsePayload = await cachedProxyResponse(request); } catch (error) { + console.error( + `[simdeck-provider-bridge] request ${request.id} ${request.method} ${request.path} failed: ${describeError(error)}`, + ); + await handleLocalProxyFailure(error); + if (request.method !== "GET") { + await complete({ + requestId: request.id, + error: describeError(error), + }); + return; + } + try { + responsePayload = await proxyLocalRequest(request); + } catch (retryError) { + console.error( + `[simdeck-provider-bridge] request ${request.id} ${request.method} ${request.path} retry failed: ${describeError(retryError)}`, + ); + await complete({ + requestId: request.id, + error: describeError(retryError), + }); + return; + } + } + + try { await complete({ requestId: request.id, - error: error instanceof Error ? error.message : String(error), + ...responsePayload, }); + } catch (error) { + console.error( + `[simdeck-provider-bridge] request ${request.id} ${request.method} ${request.path} completion failed: ${describeError(error)}`, + ); + throw error; } } @@ -246,6 +520,18 @@ function cacheKeyForRequest(request) { return `${target.pathname}?${target.searchParams.toString()}`; } +export function isWebSocketUpgradeRequest(request) { + const headers = new Headers(request.headers || {}); + return ( + headers.get("upgrade")?.toLowerCase() === "websocket" || + headers + .get("connection") + ?.toLowerCase() + .split(",") + .some((value) => value.trim() === "upgrade") === true + ); +} + async function proxyLocalRequest(request) { const target = new URL(request.path, `${localUrl}/`); if (!target.searchParams.has("simdeckToken")) { @@ -255,14 +541,24 @@ async function proxyLocalRequest(request) { headers.set("x-simdeck-token", localToken); headers.delete("host"); headers.delete("content-length"); - const response = await fetch(target, { - body: request.bodyBase64 - ? Buffer.from(request.bodyBase64, "base64") - : undefined, - headers, - method: request.method, - signal: AbortSignal.timeout(proxyTimeoutMs), - }); + let response; + try { + response = await fetch(target, { + body: request.bodyBase64 + ? Buffer.from(request.bodyBase64, "base64") + : undefined, + headers, + method: request.method, + signal: AbortSignal.timeout(proxyTimeoutMs), + }); + } catch (error) { + throw new Error( + `Local SimDeck request ${target.origin}${target.pathname} failed`, + { + cause: error, + }, + ); + } const responseHeaders = {}; for (const [name, value] of response.headers.entries()) { const lower = name.toLowerCase(); @@ -286,13 +582,39 @@ async function proxyLocalRequest(request) { }; } +async function handleLocalProxyFailure(error) { + const message = describeError(error); + if (!message.includes("Local SimDeck request")) { + return; + } + updateLocalAvailability({ + failureReason: message, + health: null, + ok: false, + simulator: null, + }); + await maybeRestartLocalDaemon({ + failureReason: message, + health: null, + ok: false, + simulator: null, + }); +} + async function localJson(path) { const target = new URL(path, `${localUrl}/`); target.searchParams.set("simdeckToken", localToken); - const response = await fetch(target, { - headers: { "x-simdeck-token": localToken }, - signal: AbortSignal.timeout(Math.min(proxyTimeoutMs, 5000)), - }); + let response; + try { + response = await fetch(target, { + headers: { "x-simdeck-token": localToken }, + signal: AbortSignal.timeout(Math.min(proxyTimeoutMs, 5000)), + }); + } catch (error) { + throw new Error(`Local SimDeck request ${target.origin}${path} failed`, { + cause: error, + }); + } if (!response.ok) { throw new Error( `${target.href} failed with ${response.status}: ${await response.text()}`, @@ -310,11 +632,17 @@ async function complete(payload) { } async function fetchJson(url, body) { - const response = await fetch(url, { - body: JSON.stringify(body), - headers: { "content-type": "application/json" }, - method: "POST", - }); + let response; + try { + response = await fetch(url, { + body: JSON.stringify(body), + headers: { "content-type": "application/json" }, + method: "POST", + signal: AbortSignal.timeout(cloudRequestTimeoutMs), + }); + } catch (error) { + throw new Error(`Studio request ${url} failed`, { cause: error }); + } if (response.status === 204) { return null; } @@ -356,6 +684,25 @@ function escapeRegExp(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } +function parseJsonArrayEnv(name) { + const raw = process.env[name]; + if (!raw) { + return null; + } + try { + const value = JSON.parse(raw); + if ( + Array.isArray(value) && + value.every((item) => typeof item === "string") + ) { + return value; + } + } catch { + return null; + } + return null; +} + function isMainModule() { return import.meta.url === pathToFileURL(process.argv[1] || "").href; } diff --git a/scripts/studio-provider-bridge.test.mjs b/scripts/studio-provider-bridge.test.mjs index 927127f..55a2ba9 100644 --- a/scripts/studio-provider-bridge.test.mjs +++ b/scripts/studio-provider-bridge.test.mjs @@ -1,7 +1,11 @@ import assert from "node:assert/strict"; import test from "node:test"; -import { normalizeStudioPublicUrlWithCloud } from "./studio-provider-bridge.mjs"; +import { + isWebSocketUpgradeRequest, + normalizeStudioPublicUrlWithCloud, + shouldStopForLocalMetadata, +} from "./studio-provider-bridge.mjs"; const cloudUrl = "https://simdeck.djdev.me"; @@ -41,3 +45,35 @@ test("preserves valid external tunnel URLs", () => { "https://preview.example.test/simulator/preview-123", ); }); + +test("keeps provider bridge alive when only local HTTP is unavailable", () => { + assert.equal(shouldStopForLocalMetadata({ ok: false }, false), false); +}); + +test("stops provider bridge when local daemon supervisor exits", () => { + assert.equal(shouldStopForLocalMetadata({ ok: false }, true), true); +}); + +test("does not stop provider bridge while local daemon metadata is healthy", () => { + assert.equal(shouldStopForLocalMetadata({ ok: true }, true), false); +}); + +test("detects websocket upgrade requests before local fetch proxying", () => { + assert.equal( + isWebSocketUpgradeRequest({ + headers: { + connection: "keep-alive, Upgrade", + upgrade: "websocket", + }, + }), + true, + ); + assert.equal( + isWebSocketUpgradeRequest({ + headers: { + accept: "application/json", + }, + }), + false, + ); +}); diff --git a/server/src/api/routes.rs b/server/src/api/routes.rs index 15e7cb0..99ea193 100644 --- a/server/src/api/routes.rs +++ b/server/src/api/routes.rs @@ -118,6 +118,16 @@ struct StreamQualityProfile { bits_per_pixel: u32, } +#[derive(Clone, Debug, Eq, PartialEq)] +struct ActiveStreamQualityState { + profile: String, + max_edge: u32, + fps: u32, + min_bitrate: u32, + bits_per_pixel: u32, + video_codec: String, +} + const STREAM_QUALITY_PROFILES: &[StreamQualityProfile] = &[ StreamQualityProfile { id: "ci-software", @@ -570,15 +580,18 @@ fn is_inspector_agent_transport_path(path: &str) -> bool { } async fn health(State(state): State) -> Json { + let video_codec = active_video_codec(&state.config); + let stream_quality = + stream_quality_state_value(¤t_stream_quality_state(video_codec.clone())); json(json_value!({ "ok": true, "httpPort": state.config.http_port, "timestamp": SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO).as_secs_f64(), - "videoCodec": active_video_codec(&state.config), + "videoCodec": video_codec, "lowLatency": state.config.low_latency, "realtimeStream": crate::transport::webrtc::realtime_stream_enabled(), "localStreamFps": env_u32("SIMDECK_LOCAL_STREAM_FPS", 60, 15, 240), - "streamQuality": stream_quality_state(), + "streamQuality": stream_quality, "webRtc": { "iceServers": crate::transport::webrtc::client_ice_servers(), "iceTransportPolicy": crate::transport::webrtc::ice_transport_policy_label() @@ -606,8 +619,8 @@ async fn metrics(State(state): State) -> Json { json(json_value!(state.metrics.snapshot())) } -async fn stream_quality() -> Json { - json(json_value!(stream_quality_response())) +async fn stream_quality(State(state): State) -> Json { + json(json_value!(stream_quality_response(&state.config))) } async fn set_stream_quality( @@ -656,6 +669,19 @@ async fn set_stream_quality( .get_or_init(|| StdMutex::new(())) .lock() .unwrap(); + let current = current_stream_quality_state(active_video_codec(&state.config)); + let next_video_codec = video_codec.unwrap_or(current.video_codec.as_str()); + let next_profile = profile.map(|profile| profile.id).unwrap_or("custom"); + if current.max_edge == max_edge + && current.fps == fps + && current.min_bitrate == min_bitrate + && current.bits_per_pixel == bits_per_pixel + && current.profile == next_profile + && current.video_codec == next_video_codec + { + return Ok(json(json_value!(stream_quality_response(&state.config)))); + } + env::set_var("SIMDECK_REALTIME_MAX_EDGE", max_edge.to_string()); env::set_var("SIMDECK_REALTIME_FPS", fps.to_string()); env::set_var("SIMDECK_LOCAL_STREAM_FPS", fps.to_string()); @@ -674,19 +700,21 @@ async fn set_stream_quality( } state.registry.reconfigure_video_encoders(); - Ok(json(json_value!(stream_quality_response()))) + Ok(json(json_value!(stream_quality_response(&state.config)))) } -fn stream_quality_response() -> Value { +fn stream_quality_response(config: &Config) -> Value { + let video_codec = active_video_codec(config); + let quality = current_stream_quality_state(video_codec.clone()); json_value!({ "ok": true, - "quality": stream_quality_state(), - "videoCodec": active_video_codec_for_env(), + "quality": stream_quality_state_value(&quality), + "videoCodec": video_codec, "profiles": STREAM_QUALITY_PROFILES.iter().map(stream_quality_profile_value).collect::>() }) } -fn stream_quality_state() -> Value { +fn current_stream_quality_state(video_codec: String) -> ActiveStreamQualityState { let configured_profile = env::var("SIMDECK_STREAM_QUALITY_PROFILE") .ok() .and_then(|value| stream_quality_profile(value.trim()).ok()); @@ -727,21 +755,25 @@ fn stream_quality_state() -> Value { .map(|candidate| candidate.id.to_owned()) .unwrap_or_else(|| "custom".to_owned()) }); - json_value!({ - "profile": profile, - "maxEdge": max_edge, - "fps": fps, - "minBitrate": min_bitrate, - "bitsPerPixel": bits_per_pixel, - "videoCodec": active_video_codec_for_env(), - }) + ActiveStreamQualityState { + profile, + max_edge, + fps, + min_bitrate, + bits_per_pixel, + video_codec, + } } -fn active_video_codec_for_env() -> String { - std::env::var("SIMDECK_VIDEO_CODEC") - .ok() - .and_then(|value| normalize_video_codec(&value).map(ToOwned::to_owned)) - .unwrap_or_else(|| "auto".to_owned()) +fn stream_quality_state_value(state: &ActiveStreamQualityState) -> Value { + json_value!({ + "profile": state.profile, + "maxEdge": state.max_edge, + "fps": state.fps, + "minBitrate": state.min_bitrate, + "bitsPerPixel": state.bits_per_pixel, + "videoCodec": state.video_codec, + }) } fn stream_quality_profile(id: &str) -> Result { diff --git a/server/src/main.rs b/server/src/main.rs index 58c28ce..21013c5 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -49,6 +49,7 @@ const SERVER_HEALTH_WATCHDOG_INTERVAL: Duration = Duration::from_secs(5); const SERVER_HEALTH_WATCHDOG_PROBE_TIMEOUT: Duration = Duration::from_secs(3); const SERVER_HEALTH_WATCHDOG_STALE_HEARTBEAT: Duration = Duration::from_secs(60); const SERVER_HEALTH_WATCHDOG_FAILURE_THRESHOLD: usize = 12; +const SERVER_HEALTH_WATCHDOG_HTTP_FAILURE_THRESHOLD: usize = 3; #[derive(Parser)] #[command(name = "simdeck")] @@ -1384,6 +1385,9 @@ fn expose_to_studio(options: StudioExposeOptions) -> anyhow::Result<()> { .unwrap_or(true); let bridge_script = studio_provider_bridge_script()?; + let executable = env::current_exe().context("resolve simdeck executable")?; + let restart_args = studio_daemon_restart_args(&options); + let status_args = vec!["daemon".to_owned(), "status".to_owned()]; println!( "Exposing {} through SimDeck Studio...", selected @@ -1408,6 +1412,28 @@ fn expose_to_studio(options: StudioExposeOptions) -> anyhow::Result<()> { ) .env("SIMDECK_LOCAL_URL", &metadata.http_url) .env("SIMDECK_LOCAL_TOKEN", &metadata.access_token) + .env("SIMDECK_LOCAL_DAEMON_PID", metadata.pid.to_string()) + .env("SIMDECK_LOCAL_DAEMON_COMMAND", &executable) + .env( + "SIMDECK_LOCAL_DAEMON_RESTART_ARGS_JSON", + serde_json::to_string(&restart_args)?, + ) + .env( + "SIMDECK_LOCAL_DAEMON_STATUS_ARGS_JSON", + serde_json::to_string(&status_args)?, + ) + .env( + "SIMDECK_PROVIDER_PARENT_PID", + std::process::id().to_string(), + ) + .env( + "SIMDECK_LOCAL_DAEMON_LOG", + metadata + .log_path + .as_ref() + .map(|path| path.to_string_lossy().into_owned()) + .unwrap_or_default(), + ) .env( "SIMDECK_STUDIO_SIMULATOR_NAME", selected @@ -1438,6 +1464,37 @@ fn expose_to_studio(options: StudioExposeOptions) -> anyhow::Result<()> { Ok(()) } +fn studio_daemon_restart_args(options: &StudioExposeOptions) -> Vec { + let mut args = vec![ + "daemon".to_owned(), + "restart".to_owned(), + "--port".to_owned(), + options.port.to_string(), + "--bind".to_owned(), + options.bind.to_string(), + "--video-codec".to_owned(), + options.video_codec.as_env_value().to_owned(), + ]; + if options.low_latency { + args.push("--low-latency".to_owned()); + } else if let Some(profile) = options.stream_quality { + args.push("--stream-quality".to_owned()); + args.push(profile.as_profile_id().to_owned()); + } else if let Some(profile) = studio_stream_quality_profile( + options.video_codec, + options.low_latency, + options.stream_quality, + ) { + args.push("--stream-quality".to_owned()); + args.push(profile); + } + if let Some(local_stream_fps) = options.local_stream_fps { + args.push("--local-stream-fps".to_owned()); + args.push(local_stream_fps.to_string()); + } + args +} + #[derive(Clone, Debug)] struct StudioSimulatorSelection { udid: String, @@ -2577,6 +2634,7 @@ fn start_server_health_watchdog(http_addr: SocketAddr, heartbeat: Arc std::thread::spawn(move || { std::thread::sleep(SERVER_HEALTH_WATCHDOG_INITIAL_DELAY); let mut consecutive_failures = 0usize; + let mut consecutive_http_probe_failures = 0usize; loop { std::thread::sleep(SERVER_HEALTH_WATCHDOG_INTERVAL); @@ -2585,16 +2643,25 @@ fn start_server_health_watchdog(http_addr: SocketAddr, heartbeat: Arc let heartbeat_stale = heartbeat_age > SERVER_HEALTH_WATCHDOG_STALE_HEARTBEAT.as_secs(); let health_ok = http_health_probe(http_addr, SERVER_HEALTH_WATCHDOG_PROBE_TIMEOUT); - if heartbeat_stale || !health_ok { + if heartbeat_stale { consecutive_failures += 1; } else { consecutive_failures = 0; } + if health_ok { + consecutive_http_probe_failures = 0; + } else { + consecutive_http_probe_failures += 1; + } - if consecutive_failures >= SERVER_HEALTH_WATCHDOG_FAILURE_THRESHOLD { + if server_health_watchdog_should_restart( + consecutive_failures, + consecutive_http_probe_failures, + ) { eprintln!( - "SimDeck server health watchdog failed {consecutive_failures} consecutive checks \ -(heartbeat_age={heartbeat_age}s, http_health_ok={health_ok}); restarting server process." + "SimDeck server health watchdog failed \ +(heartbeat_failures={consecutive_failures}, http_probe_failures={consecutive_http_probe_failures}, \ +heartbeat_age={heartbeat_age}s, http_health_ok={health_ok}); restarting server process." ); std::process::exit(RECOVERABLE_RESTART_EXIT_CODE); } @@ -2602,6 +2669,14 @@ fn start_server_health_watchdog(http_addr: SocketAddr, heartbeat: Arc }); } +fn server_health_watchdog_should_restart( + consecutive_heartbeat_failures: usize, + consecutive_http_probe_failures: usize, +) -> bool { + consecutive_heartbeat_failures >= SERVER_HEALTH_WATCHDOG_FAILURE_THRESHOLD + || consecutive_http_probe_failures >= SERVER_HEALTH_WATCHDOG_HTTP_FAILURE_THRESHOLD +} + fn http_health_probe(address: SocketAddr, timeout: Duration) -> bool { let Ok(mut stream) = std::net::TcpStream::connect_timeout(&address, timeout) else { return false; @@ -4895,8 +4970,10 @@ fn default_client_root() -> anyhow::Result { #[cfg(test)] mod tests { use super::{ - normalize_accessibility_point_for_display, service_post_error_is_retryable, Cli, Command, - DaemonCommand, VideoCodecMode, + normalize_accessibility_point_for_display, server_health_watchdog_should_restart, + service_post_error_is_retryable, studio_daemon_restart_args, Cli, Command, DaemonCommand, + StreamQualityProfileArg, StudioExposeOptions, VideoCodecMode, + SERVER_HEALTH_WATCHDOG_FAILURE_THRESHOLD, SERVER_HEALTH_WATCHDOG_HTTP_FAILURE_THRESHOLD, }; use clap::Parser; @@ -4973,6 +5050,79 @@ mod tests { )); } + #[test] + fn server_health_watchdog_restarts_when_http_listener_is_unhealthy() { + assert!(server_health_watchdog_should_restart( + 0, + SERVER_HEALTH_WATCHDOG_HTTP_FAILURE_THRESHOLD + )); + } + + #[test] + fn server_health_watchdog_waits_for_transient_http_probe_failures() { + assert!(!server_health_watchdog_should_restart( + 0, + SERVER_HEALTH_WATCHDOG_HTTP_FAILURE_THRESHOLD - 1 + )); + } + + #[test] + fn server_health_watchdog_restarts_when_runtime_heartbeat_is_stale() { + assert!(server_health_watchdog_should_restart( + SERVER_HEALTH_WATCHDOG_FAILURE_THRESHOLD, + 0 + )); + } + + #[test] + fn studio_daemon_restart_args_preserve_remote_stream_defaults() { + let args = studio_daemon_restart_args(&StudioExposeOptions { + simulator: None, + studio_url: "https://simdeck.djdev.me".to_owned(), + port: 4310, + bind: "127.0.0.1".parse().unwrap(), + video_codec: VideoCodecMode::Software, + low_latency: false, + stream_quality: None, + local_stream_fps: None, + }); + assert_eq!( + args, + [ + "daemon", + "restart", + "--port", + "4310", + "--bind", + "127.0.0.1", + "--video-codec", + "software", + "--stream-quality", + "smooth", + ] + ); + } + + #[test] + fn studio_daemon_restart_args_preserve_explicit_quality() { + let args = studio_daemon_restart_args(&StudioExposeOptions { + simulator: None, + studio_url: "https://simdeck.djdev.me".to_owned(), + port: 4310, + bind: "127.0.0.1".parse().unwrap(), + video_codec: VideoCodecMode::Hardware, + low_latency: false, + stream_quality: Some(StreamQualityProfileArg::Balanced), + local_stream_fps: None, + }); + assert!(args.ends_with(&[ + "--video-codec".to_owned(), + "hardware".to_owned(), + "--stream-quality".to_owned(), + "balanced".to_owned(), + ])); + } + #[test] fn selector_tap_keeps_matching_orientation_coordinates() { assert_eq!( diff --git a/server/src/simulators/session.rs b/server/src/simulators/session.rs index 60e9d5e..d0dde6f 100644 --- a/server/src/simulators/session.rs +++ b/server/src/simulators/session.rs @@ -11,7 +11,7 @@ use std::sync::{Arc, Condvar, Mutex, RwLock, Weak}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use tokio::sync::broadcast; use tokio::task; -use tokio::time::{timeout, Instant}; +use tokio::time::{sleep, timeout, Instant}; use tracing::debug; // This channel carries encoded H.264 access units. Subscribers must not miss @@ -20,6 +20,9 @@ use tracing::debug; const FRAME_BROADCAST_CAPACITY: usize = 128; const MIN_REFRESH_INTERVAL_MS: u64 = 16; const MIN_KEYFRAME_INTERVAL_MS: u64 = 250; +const DEFAULT_SHARED_REFRESH_FPS: u64 = 60; +const MIN_SHARED_REFRESH_FPS: u64 = 15; +const MAX_SHARED_REFRESH_FPS: u64 = 240; pub struct SimulatorSession { inner: Arc, @@ -40,6 +43,30 @@ struct SimulatorSessionInner { frame_sequence: AtomicU64, last_refresh_ms: AtomicU64, last_keyframe_ms: AtomicU64, + active_frame_subscribers: AtomicU64, + refresh_pump_running: AtomicBool, +} + +pub struct FrameSubscription { + inner: Arc, + receiver: broadcast::Receiver, +} + +impl FrameSubscription { + pub async fn recv(&mut self) -> Result { + self.receiver.recv().await + } +} + +impl Drop for FrameSubscription { + fn drop(&mut self) { + self.inner + .active_frame_subscribers + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |value| { + Some(value.saturating_sub(1)) + }) + .ok(); + } } impl SimulatorSession { @@ -64,6 +91,8 @@ impl SimulatorSession { frame_sequence: AtomicU64::new(0), last_refresh_ms: AtomicU64::new(0), last_keyframe_ms: AtomicU64::new(0), + active_frame_subscribers: AtomicU64::new(0), + refresh_pump_running: AtomicBool::new(false), }); let user_data = Weak::into_raw(Arc::downgrade(&inner)) as *mut c_void; @@ -111,9 +140,16 @@ impl SimulatorSession { .map_err(|error| AppError::internal(format!("Failed to join start task: {error}")))? } - pub fn subscribe(&self) -> broadcast::Receiver { + pub fn subscribe(&self) -> FrameSubscription { *self.inner.state.lock().unwrap() = SessionState::Streaming; - self.inner.sender.subscribe() + self.inner + .active_frame_subscribers + .fetch_add(1, Ordering::Relaxed); + self.inner.start_refresh_pump(); + FrameSubscription { + inner: self.inner.clone(), + receiver: self.inner.sender.subscribe(), + } } pub fn latest_keyframe(&self) -> Option { @@ -155,13 +191,7 @@ impl SimulatorSession { } pub fn request_refresh(&self) { - let now = now_ms(); - let previous = self.inner.last_refresh_ms.load(Ordering::Relaxed); - if now.saturating_sub(previous) < MIN_REFRESH_INTERVAL_MS { - return; - } - self.inner.last_refresh_ms.store(now, Ordering::Relaxed); - self.inner.native.request_refresh(); + self.inner.request_refresh(); } pub fn request_keyframe(&self) { @@ -272,6 +302,48 @@ unsafe extern "C" fn native_frame_callback( } impl SimulatorSessionInner { + fn start_refresh_pump(self: &Arc) { + if self + .refresh_pump_running + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) + .is_err() + { + return; + } + + let inner = self.clone(); + tokio::spawn(async move { + loop { + if inner.active_frame_subscribers.load(Ordering::Relaxed) == 0 { + inner.refresh_pump_running.store(false, Ordering::Release); + if inner.active_frame_subscribers.load(Ordering::Relaxed) == 0 { + break; + } + if inner + .refresh_pump_running + .compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire) + .is_err() + { + break; + } + } + + inner.request_refresh(); + sleep(shared_refresh_interval()).await; + } + }); + } + + fn request_refresh(&self) { + let now = now_ms(); + let previous = self.last_refresh_ms.load(Ordering::Relaxed); + if now.saturating_sub(previous) < MIN_REFRESH_INTERVAL_MS { + return; + } + self.last_refresh_ms.store(now, Ordering::Relaxed); + self.native.request_refresh(); + } + fn handle_frame(&self, frame: &ffi::xcw_native_frame) { let description = unsafe { copy_ffi_bytes(frame.description) }; let Some(data) = (unsafe { copy_ffi_bytes(frame.data) }) else { @@ -356,3 +428,13 @@ fn now_ms() -> u64 { .unwrap_or(Duration::ZERO) .as_millis() as u64 } + +fn shared_refresh_interval() -> Duration { + let fps = std::env::var("SIMDECK_REALTIME_FPS") + .or_else(|_| std::env::var("SIMDECK_LOCAL_STREAM_FPS")) + .ok() + .and_then(|value| value.parse::().ok()) + .unwrap_or(DEFAULT_SHARED_REFRESH_FPS) + .clamp(MIN_SHARED_REFRESH_FPS, MAX_SHARED_REFRESH_FPS); + Duration::from_micros(1_000_000 / fps) +} diff --git a/server/src/transport/webrtc.rs b/server/src/transport/webrtc.rs index b95e694..3da6616 100644 --- a/server/src/transport/webrtc.rs +++ b/server/src/transport/webrtc.rs @@ -47,10 +47,10 @@ const WEBRTC_REALTIME_WRITE_TIMEOUT: Duration = Duration::from_millis(45); const WEBRTC_REALTIME_KEYFRAME_WRITE_TIMEOUT: Duration = Duration::from_millis(90); const WEBRTC_INITIAL_KEYFRAME_TIMEOUT: Duration = Duration::from_secs(5); const WEBRTC_RTP_OUTBOUND_MTU: usize = 1200; -const WEBRTC_PEER_DISCONNECTED_TIMEOUT: Duration = Duration::from_secs(2); +const WEBRTC_PEER_DISCONNECTED_TIMEOUT: Duration = Duration::from_secs(12); static WEBRTC_MEDIA_STREAMS: OnceLock>>> = OnceLock::new(); -const MAX_WEBRTC_MEDIA_STREAMS_PER_UDID: usize = 4; +const MAX_WEBRTC_MEDIA_STREAMS_PER_UDID: usize = 16; #[derive(Clone)] struct WebRtcMediaStreamToken { @@ -606,7 +606,7 @@ fn offer_h264_profile_level_ids(sdp: &str) -> Vec { } fn h264_rtcp_feedback() -> Vec { - let mut feedback = vec![ + vec![ RTCPFeedback { typ: "goog-remb".to_owned(), parameter: String::new(), @@ -619,21 +619,15 @@ fn h264_rtcp_feedback() -> Vec { typ: "ccm".to_owned(), parameter: "fir".to_owned(), }, + RTCPFeedback { + typ: "nack".to_owned(), + parameter: String::new(), + }, RTCPFeedback { typ: "nack".to_owned(), parameter: "pli".to_owned(), }, - ]; - if !realtime_stream_enabled() { - feedback.insert( - 3, - RTCPFeedback { - typ: "nack".to_owned(), - parameter: String::new(), - }, - ); - } - feedback + ] } fn rtcp_packet_requests_keyframe(packet: &(dyn RtcpPacket + Send + Sync)) -> bool { @@ -856,8 +850,6 @@ impl WebRtcMediaStream { ); let mut waiting_for_keyframe = false; let mut peer_disconnected_since: Option = None; - let live_refresh_interval = realtime_sample_duration(); - let mut live_refresh_sleep = Box::pin(time::sleep(live_refresh_interval)); let _guard = WebRtcMetricsGuard::new(state.metrics.clone()); let first_frame_duration = send_timing.duration_for(&first_frame, realtime_stream); @@ -917,12 +909,6 @@ impl WebRtcMediaStream { peer_disconnected_since = None; } } - _ = &mut live_refresh_sleep => { - session.request_refresh(); - live_refresh_sleep - .as_mut() - .reset(time::Instant::now() + live_refresh_interval); - } command = stream_control_rx.recv() => { let Some(command) = command else { continue; @@ -1480,6 +1466,28 @@ mod tests { assert!(!rtcp_packet_requests_keyframe(&SenderReport::default())); } + #[test] + fn realtime_h264_advertises_retransmission_feedback() { + let feedback = super::h264_rtcp_feedback(); + assert!(feedback + .iter() + .any(|item| item.typ == "nack" && item.parameter.is_empty())); + assert!(feedback + .iter() + .any(|item| item.typ == "nack" && item.parameter == "pli")); + assert!(feedback + .iter() + .any(|item| item.typ == "ccm" && item.parameter == "fir")); + assert!(feedback + .iter() + .any(|item| item.typ == "transport-cc" && item.parameter.is_empty())); + } + + #[test] + fn peer_disconnected_grace_covers_remote_ice_wobbles() { + assert!(super::WEBRTC_PEER_DISCONNECTED_TIMEOUT >= Duration::from_secs(10)); + } + #[test] fn registering_second_webrtc_stream_does_not_cancel_first() { let udid = format!("test-{}", std::process::id()); @@ -1532,26 +1540,27 @@ mod tests { } #[test] - fn registering_fifth_webrtc_stream_cancels_oldest() { + fn registering_more_than_max_webrtc_streams_cancels_oldest() { let udid = format!("test-cap-{}", std::process::id()); super::reset_webrtc_media_streams_for_test(&udid); let (_first_token, mut first_rx) = super::register_webrtc_media_stream_for_test(&udid); - let (second_token, mut second_rx) = super::register_webrtc_media_stream_for_test(&udid); - let (third_token, mut third_rx) = super::register_webrtc_media_stream_for_test(&udid); - let (fourth_token, mut fourth_rx) = super::register_webrtc_media_stream_for_test(&udid); - let (fifth_token, mut fifth_rx) = super::register_webrtc_media_stream_for_test(&udid); + let mut retained = Vec::new(); + for _ in 0..super::MAX_WEBRTC_MEDIA_STREAMS_PER_UDID { + retained.push(super::register_webrtc_media_stream_for_test(&udid)); + } assert!(first_rx.try_recv().is_ok()); - assert!(second_rx.try_recv().is_err()); - assert!(third_rx.try_recv().is_err()); - assert!(fourth_rx.try_recv().is_err()); - assert!(fifth_rx.try_recv().is_err()); - assert_eq!(super::active_webrtc_media_stream_count(&udid), 4); + for (_token, rx) in retained.iter_mut() { + assert!(rx.try_recv().is_err()); + } + assert_eq!( + super::active_webrtc_media_stream_count(&udid), + super::MAX_WEBRTC_MEDIA_STREAMS_PER_UDID + ); - super::clear_webrtc_media_stream_for_test(&udid, &second_token); - super::clear_webrtc_media_stream_for_test(&udid, &third_token); - super::clear_webrtc_media_stream_for_test(&udid, &fourth_token); - super::clear_webrtc_media_stream_for_test(&udid, &fifth_token); + for (token, _rx) in retained { + super::clear_webrtc_media_stream_for_test(&udid, &token); + } assert!(!super::has_media_stream(&udid)); } diff --git a/skills/simdeck/SKILL.md b/skills/simdeck/SKILL.md index 2ed6fee..60f0934 100644 --- a/skills/simdeck/SKILL.md +++ b/skills/simdeck/SKILL.md @@ -56,7 +56,8 @@ frames instead of building latency. Studio providers default to the `smooth` stream quality profile (1170 px, dynamic up to 60 fps, higher bitrate to reduce artifacts); override with `--stream-quality quality|balanced|fast|smooth|economy|ci-software`, or pass -`--video-codec hardware` when a dedicated hardware encoder is preferable. +`--video-codec hardware` when a dedicated hardware encoder is preferable. The +remote Studio viewer exposes 15, 30, and 60 fps choices in the stream menu. The local viewer gets the API token automatically. LAN browsers pair with the printed code before receiving the API cookie. Direct HTTP calls need `X-SimDeck-Token` or `Authorization: Bearer `. From e2cb7158b7076092c809b4c59c0f6d3ee8ca2a57 Mon Sep 17 00:00:00 2001 From: DjDeveloperr Date: Mon, 4 May 2026 22:08:21 -0400 Subject: [PATCH 2/2] Harden remote WebRTC reconnects --- client/src/features/stream/stats.ts | 3 + client/src/features/stream/streamTypes.ts | 3 + .../src/features/stream/streamWorkerClient.ts | 234 +++++++++++++++--- client/src/features/toolbar/DebugPanel.tsx | 3 + server/src/metrics/counters.rs | 6 + 5 files changed, 210 insertions(+), 39 deletions(-) diff --git a/client/src/features/stream/stats.ts b/client/src/features/stream/stats.ts index ddcacd9..92fd86a 100644 --- a/client/src/features/stream/stats.ts +++ b/client/src/features/stream/stats.ts @@ -10,12 +10,15 @@ export function createEmptyStreamStats(): StreamStats { droppedFrames: 0, frameSequence: 0, height: 0, + iceRestartReason: "", + iceRestarts: 0, latestFrameGapMs: 0, latestRenderMs: 0, maxRenderMs: 0, packetsLost: 0, presentationDroppedFrames: 0, receivedPackets: 0, + reconnectReason: "", reconnects: 0, renderedFrames: 0, waitingForKeyFrame: false, diff --git a/client/src/features/stream/streamTypes.ts b/client/src/features/stream/streamTypes.ts index 42044f9..c1b3b61 100644 --- a/client/src/features/stream/streamTypes.ts +++ b/client/src/features/stream/streamTypes.ts @@ -45,12 +45,15 @@ export interface StreamStats extends Size { decoderDroppedFrames: number; droppedFrames: number; frameSequence: number; + iceRestartReason: string; + iceRestarts: number; latestFrameGapMs: number; latestRenderMs: number; maxRenderMs: number; packetsLost: number; presentationDroppedFrames: number; receivedPackets: number; + reconnectReason: string; reconnects: number; renderedFrames: number; waitingForKeyFrame: boolean; diff --git a/client/src/features/stream/streamWorkerClient.ts b/client/src/features/stream/streamWorkerClient.ts index d8ca043..ff06dff 100644 --- a/client/src/features/stream/streamWorkerClient.ts +++ b/client/src/features/stream/streamWorkerClient.ts @@ -18,6 +18,7 @@ const WEBRTC_LOCAL_RECEIVER_BUFFER_SECONDS = 0.001; const WEBRTC_REMOTE_RECEIVER_BUFFER_SECONDS = 0.06; const WEBRTC_LOCAL_DISCONNECTED_GRACE_MS = 1000; const WEBRTC_REMOTE_DISCONNECTED_GRACE_MS = 10000; +const WEBRTC_REMOTE_ICE_RESTART_GRACE_MS = 1500; const WEBRTC_RECONNECT_BASE_DELAY_MS = 250; const WEBRTC_RECONNECT_MAX_DELAY_MS = 1000; @@ -191,6 +192,8 @@ class WebRtcStreamClient implements StreamClientBackend { private disconnectGraceTimeout = 0; private frameWatchdogTimeout = 0; private hasRenderedFrame = false; + private iceRestartInFlight = false; + private iceRestartTimeout = 0; private lastVideoFrameAt = 0; private peerConnection: RTCPeerConnection | null = null; private reconnectTimeout = 0; @@ -256,6 +259,7 @@ class WebRtcStreamClient implements StreamClientBackend { if (wasReconnecting) { this.clearReconnectTimeout(); this.clearDisconnectGraceTimeout(); + this.clearIceRestartTimeout(); this.clearFrameWatchdog(); this.closeActiveConnection(); } else { @@ -281,9 +285,11 @@ class WebRtcStreamClient implements StreamClientBackend { }); try { - await postStreamConfigWithAuthRetry(target.streamConfig); - if (generation !== this.connectGeneration) { - return; + if (!target.remote) { + await postStreamConfigWithAuthRetry(target.streamConfig); + if (generation !== this.connectGeneration) { + return; + } } const health = await fetchHealth().catch(() => null); if (generation !== this.connectGeneration) { @@ -392,6 +398,8 @@ class WebRtcStreamClient implements StreamClientBackend { } if (peerConnection.connectionState === "connected") { this.clearDisconnectGraceTimeout(); + this.clearIceRestartTimeout(); + this.iceRestartInFlight = false; this.reconnectDelayMs = WEBRTC_RECONNECT_BASE_DELAY_MS; if (this.reportedVideoConfig) { this.onMessage({ @@ -403,6 +411,11 @@ class WebRtcStreamClient implements StreamClientBackend { } if (peerConnection.connectionState === "disconnected") { if (this.hasRenderedFrame) { + this.scheduleIceRestart( + target, + generation, + "connection-disconnected", + ); this.scheduleDisconnectedGrace(target, generation); return; } @@ -410,50 +423,23 @@ class WebRtcStreamClient implements StreamClientBackend { target, generation, new Error("WebRTC connection disconnected."), + "connection-disconnected-before-first-frame", ); return; } if (peerConnection.connectionState === "failed") { void this.updateSelectedCandidatePair(peerConnection, target); - this.handleConnectionError( + void this.restartIceOrReconnect( target, generation, - new Error("WebRTC connection failed."), + "connection-failed", ); } }; - const offer = safariBaselineH264Offer(await peerConnection.createOffer()); - if (generation !== this.connectGeneration) { - return; - } - await peerConnection.setLocalDescription(offer); - await waitForIceGathering(peerConnection); - if (generation !== this.connectGeneration) { - return; - } - const localDescription = peerConnection.localDescription; - if (!localDescription) { - throw new Error("WebRTC local offer was not created."); - } - this.diagnostics.localCandidateSummary = summarizeSdpCandidates( - localDescription.sdp, - ); - this.postDiagnostics(target, "local-offer"); - - const response = await postWebRtcOfferWithAuthRetry( - target, - localDescription, - ); - const answer = (await response.json()) as RTCSessionDescriptionInit; - if (generation !== this.connectGeneration) { - return; - } - this.diagnostics.remoteCandidateSummary = summarizeSdpCandidates( - answer.sdp ?? "", - ); - this.postDiagnostics(target, "remote-answer"); - await peerConnection.setRemoteDescription(answer); + await this.negotiatePeerConnection(peerConnection, target, generation, { + detailPrefix: "local", + }); this.scheduleFrameWatchdog(target, generation); } catch (error) { this.handleConnectionError(target, generation, error); @@ -466,6 +452,7 @@ class WebRtcStreamClient implements StreamClientBackend { this.connectGeneration += 1; this.clearReconnectTimeout(); this.clearDisconnectGraceTimeout(); + this.clearIceRestartTimeout(); this.clearFrameWatchdog(); this.closeActiveConnection(); this.onMessage({ type: "status", status: { state: "idle" } }); @@ -480,13 +467,54 @@ class WebRtcStreamClient implements StreamClientBackend { return; } const generation = ++this.streamConfigGeneration; - await postStreamConfigWithAuthRetry(config); + await postStreamConfigWithAuthRetry(config, { remote: this.remoteMode }); if (generation !== this.streamConfigGeneration) { return; } this.sendControl({ forceKeyframe: true, type: "streamControl" }); } + private async negotiatePeerConnection( + peerConnection: RTCPeerConnection, + target: StreamConnectTarget, + generation: number, + options: { detailPrefix: string; iceRestart?: boolean }, + ) { + const offer = safariBaselineH264Offer( + await peerConnection.createOffer({ iceRestart: options.iceRestart }), + ); + if (generation !== this.connectGeneration) { + return; + } + await peerConnection.setLocalDescription(offer); + await waitForIceGathering(peerConnection); + if (generation !== this.connectGeneration) { + return; + } + const localDescription = peerConnection.localDescription; + if (!localDescription) { + throw new Error("WebRTC local offer was not created."); + } + this.diagnostics.localCandidateSummary = summarizeSdpCandidates( + localDescription.sdp, + ); + this.postDiagnostics(target, `${options.detailPrefix}-offer`); + + const response = await postWebRtcOfferWithAuthRetry( + target, + localDescription, + ); + const answer = (await response.json()) as RTCSessionDescriptionInit; + if (generation !== this.connectGeneration) { + return; + } + this.diagnostics.remoteCandidateSummary = summarizeSdpCandidates( + answer.sdp ?? "", + ); + this.postDiagnostics(target, `${options.detailPrefix}-answer`); + await peerConnection.setRemoteDescription(answer); + } + destroy() { this.disconnect(); } @@ -496,6 +524,8 @@ class WebRtcStreamClient implements StreamClientBackend { this.animationFrame = 0; this.clearFrameWatchdog(); this.clearDisconnectGraceTimeout(); + this.clearIceRestartTimeout(); + this.iceRestartInFlight = false; this.clearReceiverStatsPolling(); this.cancelVideoFrameCallback(); this.captureCurrentVideoFrame(); @@ -524,12 +554,14 @@ class WebRtcStreamClient implements StreamClientBackend { target: StreamConnectTarget, generation: number, error: unknown, + reason = "connection-error", ) { if (generation !== this.connectGeneration || !this.shouldReconnect) { return; } const message = error instanceof Error ? error.message : String(error); const friendlyMessage = friendlyStreamError(message); + this.stats.reconnectReason = reason; this.closeActiveConnection(); this.onMessage({ type: "status", @@ -546,7 +578,7 @@ class WebRtcStreamClient implements StreamClientBackend { } : { error: friendlyMessage, state: "error" }, }); - this.scheduleReconnect(target, generation); + this.scheduleReconnect(target, generation, reason); } private scheduleDisconnectedGrace( @@ -565,11 +597,16 @@ class WebRtcStreamClient implements StreamClientBackend { target, generation, new Error("WebRTC connection disconnected."), + "connection-disconnected-grace-expired", ); }, disconnectedGraceMs(target)); } - private scheduleReconnect(target: StreamConnectTarget, generation: number) { + private scheduleReconnect( + target: StreamConnectTarget, + generation: number, + reason: string, + ) { if ( this.reconnectTimeout || generation !== this.connectGeneration || @@ -578,6 +615,7 @@ class WebRtcStreamClient implements StreamClientBackend { return; } this.stats.reconnects += 1; + this.stats.reconnectReason = reason; this.onMessage({ type: "stats", stats: { ...this.stats } }); const delayMs = this.reconnectDelayMs; this.reconnectDelayMs = Math.min( @@ -594,10 +632,16 @@ class WebRtcStreamClient implements StreamClientBackend { } private resetFrameStateForNewConnection() { + const iceRestartReason = this.stats.iceRestartReason; + const iceRestarts = this.stats.iceRestarts; + const reconnectReason = this.stats.reconnectReason; const reconnects = this.stats.reconnects; this.hasRenderedFrame = false; this.lastVideoFrameAt = 0; this.stats = createEmptyStreamStats(); + this.stats.iceRestartReason = iceRestartReason; + this.stats.iceRestarts = iceRestarts; + this.stats.reconnectReason = reconnectReason; this.stats.reconnects = reconnects; this.onMessage({ type: "stats", stats: { ...this.stats } }); } @@ -622,6 +666,7 @@ class WebRtcStreamClient implements StreamClientBackend { target, generation, new Error("WebRTC video stalled before rendering fresh frames."), + "first-frame-timeout", ); return; } @@ -662,6 +707,110 @@ class WebRtcStreamClient implements StreamClientBackend { this.disconnectGraceTimeout = 0; } + private clearIceRestartTimeout() { + if (!this.iceRestartTimeout) { + return; + } + window.clearTimeout(this.iceRestartTimeout); + this.iceRestartTimeout = 0; + } + + private scheduleIceRestart( + target: StreamConnectTarget, + generation: number, + reason: string, + ) { + if ( + !target.remote || + this.iceRestartTimeout || + this.iceRestartInFlight || + generation !== this.connectGeneration || + !this.shouldReconnect + ) { + return; + } + this.iceRestartTimeout = window.setTimeout(() => { + this.iceRestartTimeout = 0; + void this.restartIceOrReconnect(target, generation, reason); + }, WEBRTC_REMOTE_ICE_RESTART_GRACE_MS); + } + + private async restartIceOrReconnect( + target: StreamConnectTarget, + generation: number, + reason: string, + ) { + if (generation !== this.connectGeneration || !this.shouldReconnect) { + return; + } + if (!target.remote) { + this.handleConnectionError( + target, + generation, + new Error("WebRTC connection failed."), + reason, + ); + return; + } + if (this.iceRestartInFlight) { + return; + } + const restarted = await this.tryIceRestart(target, generation, reason); + if ( + !restarted && + generation === this.connectGeneration && + this.shouldReconnect + ) { + this.handleConnectionError( + target, + generation, + new Error("WebRTC ICE restart failed."), + `${reason}-ice-restart-failed`, + ); + } + } + + private async tryIceRestart( + target: StreamConnectTarget, + generation: number, + reason: string, + ): Promise { + const peerConnection = this.peerConnection; + if ( + !peerConnection || + peerConnection.connectionState === "closed" || + peerConnection.signalingState !== "stable" || + this.iceRestartInFlight || + generation !== this.connectGeneration || + !this.shouldReconnect + ) { + return false; + } + this.iceRestartInFlight = true; + this.stats.iceRestartReason = reason; + this.stats.iceRestarts += 1; + this.onMessage({ type: "stats", stats: { ...this.stats } }); + this.postDiagnostics(target, "ice-restart-start"); + try { + await this.negotiatePeerConnection(peerConnection, target, generation, { + detailPrefix: "ice-restart", + iceRestart: true, + }); + if (generation !== this.connectGeneration || !this.shouldReconnect) { + return true; + } + this.iceRestartInFlight = false; + this.scheduleFrameWatchdog(target, generation); + this.postDiagnostics(target, "ice-restart-complete"); + return true; + } catch (error) { + this.iceRestartInFlight = false; + this.diagnostics.selectedCandidatePair = `ice-restart-error:${error instanceof Error ? error.message : String(error)}`; + this.postDiagnostics(target, "ice-restart-error"); + return false; + } + } + private attachDiagnostics( peerConnection: RTCPeerConnection, target: StreamConnectTarget, @@ -1034,6 +1183,9 @@ async function postWebRtcOfferWithAuthRetry( } return response; } + if (target.remote) { + throw new Error(await response.text()); + } await fetchHealth(); const retry = await postWebRtcOffer(target, localDescription); if (!retry.ok) { @@ -1044,6 +1196,7 @@ async function postWebRtcOfferWithAuthRetry( async function postStreamConfigWithAuthRetry( config: StreamConfig | undefined, + options: { remote?: boolean } = {}, ): Promise { if (!config) { return; @@ -1055,6 +1208,9 @@ async function postStreamConfigWithAuthRetry( } return; } + if (options.remote) { + throw new Error(await response.text()); + } await fetchHealth(); const retry = await postStreamConfig(config); if (!retry.ok) { diff --git a/client/src/features/toolbar/DebugPanel.tsx b/client/src/features/toolbar/DebugPanel.tsx index 64c0f0e..faeefdd 100644 --- a/client/src/features/toolbar/DebugPanel.tsx +++ b/client/src/features/toolbar/DebugPanel.tsx @@ -51,6 +51,9 @@ export function DebugPanel({ { label: "Decode Drops", value: String(stats.decoderDroppedFrames) }, { label: "Present Drops", value: String(stats.presentationDroppedFrames) }, { label: "Reconnects", value: String(stats.reconnects) }, + { label: "Reconnect Reason", value: stats.reconnectReason || "—" }, + { label: "ICE Restarts", value: String(stats.iceRestarts) }, + { label: "ICE Restart Reason", value: stats.iceRestartReason || "—" }, { label: "Decoded", value: String(stats.decodedFrames) }, { label: "Rendered", value: String(stats.renderedFrames) }, { label: "Render", value: formatMs(stats.latestRenderMs) }, diff --git a/server/src/metrics/counters.rs b/server/src/metrics/counters.rs index 9369dcc..54174b6 100644 --- a/server/src/metrics/counters.rs +++ b/server/src/metrics/counters.rs @@ -69,7 +69,10 @@ pub struct ClientStreamStats { pub dropped_frames: Option, pub packets_lost: Option, pub presentation_dropped_frames: Option, + pub ice_restarts: Option, + pub ice_restart_reason: Option, pub reconnects: Option, + pub reconnect_reason: Option, pub frame_sequence: Option, pub decode_queue_size: Option, pub waiting_for_key_frame: Option, @@ -196,7 +199,10 @@ mod tests { dropped_frames: None, packets_lost: None, presentation_dropped_frames: None, + ice_restarts: None, + ice_restart_reason: None, reconnects: None, + reconnect_reason: None, frame_sequence: None, decode_queue_size: None, waiting_for_key_frame: None,