diff --git a/packages/client/index.ts b/packages/client/index.ts index aad592a887..55f9b6d1d1 100644 --- a/packages/client/index.ts +++ b/packages/client/index.ts @@ -23,7 +23,6 @@ export * from './src/helpers/DynascaleManager'; export * from './src/helpers/ViewportTracker'; export * from './src/helpers/sound-detector'; export * from './src/helpers/participantUtils'; -export * from './src/helpers/RNSpeechDetector'; export * as Browsers from './src/helpers/browsers'; export * from './src/logger'; diff --git a/packages/client/src/devices/MicrophoneManager.ts b/packages/client/src/devices/MicrophoneManager.ts index 318cf91244..70e7a68b44 100644 --- a/packages/client/src/devices/MicrophoneManager.ts +++ b/packages/client/src/devices/MicrophoneManager.ts @@ -24,7 +24,6 @@ import { createSafeAsyncSubscription, createSubscription, } from '../store/rxUtils'; -import { RNSpeechDetector } from '../helpers/RNSpeechDetector'; import { withoutConcurrency } from '../helpers/concurrency'; import { disposeOfMediaStream } from './utils'; import { promiseWithResolvers } from '../helpers/promise'; @@ -36,7 +35,6 @@ export class MicrophoneManager extends AudioDeviceManager Promise; private soundDetectorDeviceId?: string; private noAudioDetectorCleanup?: () => Promise; - private rnSpeechDetector: RNSpeechDetector | undefined; private noiseCancellation: INoiseCancellation | undefined; private noiseCancellationChangeUnsubscribe: (() => void) | undefined; private noiseCancellationRegistration?: Promise; @@ -422,13 +420,19 @@ export class MicrophoneManager extends AudioDeviceManager { + const speechActivity = + globalThis.streamRNVideoSDK?.nativeEvents?.speechActivity; + if (!speechActivity) { + this.logger.warn( + 'Native speech activity not available, make sure the "@stream-io/react-native-webrtc" peer dependency version is satisfied', + ); + return; + } + const unsubscribe = speechActivity.subscribe((event) => { this.state.setSpeakingWhileMuted(event.isSoundDetected); }); this.soundDetectorCleanup = async () => { unsubscribe(); - this.rnSpeechDetector = undefined; }; } else { // Need to start a new stream that's not connected to publisher diff --git a/packages/client/src/devices/__tests__/MicrophoneManagerRN.test.ts b/packages/client/src/devices/__tests__/MicrophoneManagerRN.test.ts index b947829de1..5805f4fcac 100644 --- a/packages/client/src/devices/__tests__/MicrophoneManagerRN.test.ts +++ b/packages/client/src/devices/__tests__/MicrophoneManagerRN.test.ts @@ -12,11 +12,12 @@ import { import { of } from 'rxjs'; import '../../rtc/__tests__/mocks/webrtc.mocks'; import { OwnCapability } from '../../gen/coordinator'; -import { SoundStateChangeHandler } from '../../helpers/sound-detector'; import { settled, withoutConcurrency } from '../../helpers/concurrency'; -let handler: SoundStateChangeHandler = () => {}; -let unsubscribeHandlers: ReturnType[] = []; +let speechActivityCallback: + | ((state: { isSoundDetected: boolean }) => void) + | null = null; +let unsubscribeMocks: ReturnType[] = []; vi.mock('../../helpers/platforms.ts', () => { return { @@ -46,28 +47,21 @@ vi.mock('../../Call.ts', () => { }; }); -vi.mock('../../helpers/RNSpeechDetector.ts', () => { - console.log('MOCKING RNSpeechDetector'); - return { - RNSpeechDetector: vi.fn().mockImplementation(() => ({ - start: vi.fn((callback) => { - handler = callback; - const unsubscribe = vi.fn(); - unsubscribeHandlers.push(unsubscribe); - return unsubscribe; - }), - stop: vi.fn(), - onSpeakingDetectedStateChange: vi.fn(), - })), - }; -}); - describe('MicrophoneManager React Native', () => { let manager: MicrophoneManager; let checkPermissionMock: ReturnType; + let subscribeMock: ReturnType; + beforeEach(() => { - unsubscribeHandlers = []; + speechActivityCallback = null; + unsubscribeMocks = []; checkPermissionMock = vi.fn(async () => true); + subscribeMock = vi.fn((cb) => { + speechActivityCallback = cb; + const unsub = vi.fn(); + unsubscribeMocks.push(unsub); + return unsub; + }); globalThis.streamRNVideoSDK = { callManager: { @@ -78,6 +72,11 @@ describe('MicrophoneManager React Native', () => { permissions: { check: checkPermissionMock, }, + nativeEvents: { + speechActivity: { + subscribe: subscribeMock, + }, + }, }; const devicePersistence = { enabled: false, storageKey: '' }; @@ -100,7 +99,7 @@ describe('MicrophoneManager React Native', () => { await vi.waitUntil(() => fn.mock.calls.length > 0, { timeout: 100 }); expect(fn).toHaveBeenCalled(); - expect(manager['rnSpeechDetector']?.start).toHaveBeenCalled(); + expect(subscribeMock).toHaveBeenCalled(); }); it('should check native microphone permission before starting detection', async () => { @@ -146,15 +145,15 @@ describe('MicrophoneManager React Native', () => { it('should update speaking while muted state', async () => { await manager['startSpeakingWhileMutedDetection'](); - expect(manager['rnSpeechDetector']?.start).toHaveBeenCalled(); + expect(subscribeMock).toHaveBeenCalled(); expect(manager.state.speakingWhileMuted).toBe(false); - handler!({ isSoundDetected: true, audioLevel: 2 }); + speechActivityCallback!({ isSoundDetected: true }); expect(manager.state.speakingWhileMuted).toBe(true); - handler!({ isSoundDetected: false, audioLevel: 0 }); + speechActivityCallback!({ isSoundDetected: false }); expect(manager.state.speakingWhileMuted).toBe(false); }); @@ -163,21 +162,21 @@ describe('MicrophoneManager React Native', () => { await manager['startSpeakingWhileMutedDetection']('device-1'); await manager['startSpeakingWhileMutedDetection']('device-1'); - expect(unsubscribeHandlers).toHaveLength(1); + expect(unsubscribeMocks).toHaveLength(1); await manager['stopSpeakingWhileMutedDetection'](); - expect(unsubscribeHandlers[0]).toHaveBeenCalledTimes(1); + expect(unsubscribeMocks[0]).toHaveBeenCalledTimes(1); }); it('should cleanup previous speech detector before starting a new one', async () => { await manager['startSpeakingWhileMutedDetection']('device-1'); await manager['startSpeakingWhileMutedDetection']('device-2'); - expect(unsubscribeHandlers).toHaveLength(2); - expect(unsubscribeHandlers[0]).toHaveBeenCalledTimes(1); + expect(unsubscribeMocks).toHaveLength(2); + expect(unsubscribeMocks[0]).toHaveBeenCalledTimes(1); await manager['stopSpeakingWhileMutedDetection'](); - expect(unsubscribeHandlers[1]).toHaveBeenCalledTimes(1); + expect(unsubscribeMocks[1]).toHaveBeenCalledTimes(1); }); it('should stop speaking while muted notifications if user loses permission to send audio', async () => { diff --git a/packages/client/src/helpers/RNSpeechDetector.ts b/packages/client/src/helpers/RNSpeechDetector.ts deleted file mode 100644 index ba0ef2f354..0000000000 --- a/packages/client/src/helpers/RNSpeechDetector.ts +++ /dev/null @@ -1,224 +0,0 @@ -import { BaseStats, flatten } from '../stats'; -import { SoundStateChangeHandler } from './sound-detector'; -import { videoLoggerSystem } from '../logger'; - -export class RNSpeechDetector { - private readonly pc1 = new RTCPeerConnection({}); - private readonly pc2 = new RTCPeerConnection({}); - private audioStream: MediaStream | undefined; - private externalAudioStream: MediaStream | undefined; - private isStopped = false; - - constructor(externalAudioStream?: MediaStream) { - this.externalAudioStream = externalAudioStream; - } - - /** - * Starts the speech detection. - */ - public async start(onSoundDetectedStateChanged: SoundStateChangeHandler) { - let detachListeners: (() => void) | undefined; - let unsubscribe: (() => void) | undefined; - - try { - this.isStopped = false; - const audioStream = - this.externalAudioStream != null - ? this.externalAudioStream - : await navigator.mediaDevices.getUserMedia({ audio: true }); - this.audioStream = audioStream; - - const onPc1IceCandidate = (e: RTCPeerConnectionIceEvent) => { - this.forwardIceCandidate(this.pc2, e.candidate); - }; - const onPc2IceCandidate = (e: RTCPeerConnectionIceEvent) => { - this.forwardIceCandidate(this.pc1, e.candidate); - }; - const onTrackPc2 = (e: RTCTrackEvent) => { - e.streams[0].getTracks().forEach((track) => { - // In RN, the remote track is automatically added to the audio output device - // so we need to mute it to avoid hearing the audio back - // @ts-expect-error _setVolume is a private method in react-native-webrtc - track._setVolume(0); - }); - }; - - this.pc1.addEventListener('icecandidate', onPc1IceCandidate); - this.pc2.addEventListener('icecandidate', onPc2IceCandidate); - this.pc2.addEventListener('track', onTrackPc2); - detachListeners = () => { - this.pc1.removeEventListener('icecandidate', onPc1IceCandidate); - this.pc2.removeEventListener('icecandidate', onPc2IceCandidate); - this.pc2.removeEventListener('track', onTrackPc2); - }; - - audioStream - .getTracks() - .forEach((track) => this.pc1.addTrack(track, audioStream)); - const offer = await this.pc1.createOffer({}); - await this.pc2.setRemoteDescription(offer); - await this.pc1.setLocalDescription(offer); - const answer = await this.pc2.createAnswer(); - await this.pc1.setRemoteDescription(answer); - await this.pc2.setLocalDescription(answer); - unsubscribe = this.onSpeakingDetectedStateChange( - onSoundDetectedStateChanged, - ); - return () => { - detachListeners?.(); - unsubscribe?.(); - this.stop(); - }; - } catch (error) { - detachListeners?.(); - unsubscribe?.(); - this.stop(); - - const logger = videoLoggerSystem.getLogger('RNSpeechDetector'); - logger.error('error handling permissions: ', error); - return () => {}; - } - } - - /** - * Stops the speech detection and releases all allocated resources. - */ - private stop() { - if (this.isStopped) return; - this.isStopped = true; - - this.pc1.close(); - this.pc2.close(); - - if (this.externalAudioStream != null) { - this.externalAudioStream = undefined; - } else { - this.cleanupAudioStream(); - } - } - - /** - * Public method that detects the audio levels and returns the status. - */ - private onSpeakingDetectedStateChange( - onSoundDetectedStateChanged: SoundStateChangeHandler, - ) { - const initialBaselineNoiseLevel = 0.13; - let baselineNoiseLevel = initialBaselineNoiseLevel; - let speechDetected = false; - let speechTimer: NodeJS.Timeout | undefined; - let silenceTimer: NodeJS.Timeout | undefined; - const audioLevelHistory: number[] = []; // Store recent audio levels for smoother detection - const historyLength = 10; - const silenceThreshold = 1.1; - const resetThreshold = 0.9; - const speechTimeout = 500; // Speech is set to true after 500ms of audio detection - const silenceTimeout = 5000; // Reset baseline after 5 seconds of silence - - const checkAudioLevel = async () => { - try { - const stats = await this.pc1.getStats(); - const report = flatten(stats); - // Audio levels are present inside stats of type `media-source` and of kind `audio` - const audioMediaSourceStats = report.find( - (stat) => - stat.type === 'media-source' && - (stat as RTCRtpStreamStats).kind === 'audio', - ) as BaseStats; - if (audioMediaSourceStats) { - const { audioLevel } = audioMediaSourceStats; - if (audioLevel) { - // Update audio level history (with max historyLength sized array) - audioLevelHistory.push(audioLevel); - if (audioLevelHistory.length > historyLength) { - audioLevelHistory.shift(); - } - - // Calculate average audio level - const avgAudioLevel = - audioLevelHistory.reduce((a, b) => a + b, 0) / - audioLevelHistory.length; - - // Update baseline (if necessary) based on silence detection - if (avgAudioLevel < baselineNoiseLevel * silenceThreshold) { - if (!silenceTimer) { - silenceTimer = setTimeout(() => { - baselineNoiseLevel = Math.min( - avgAudioLevel * resetThreshold, - initialBaselineNoiseLevel, - ); - }, silenceTimeout); - } - } else { - clearTimeout(silenceTimer); - silenceTimer = undefined; - } - - // Speech detection with hysteresis - if (avgAudioLevel > baselineNoiseLevel * 1.5) { - if (!speechDetected) { - speechDetected = true; - onSoundDetectedStateChanged({ - isSoundDetected: true, - audioLevel, - }); - } - - clearTimeout(speechTimer); - - speechTimer = setTimeout(() => { - speechDetected = false; - onSoundDetectedStateChanged({ - isSoundDetected: false, - audioLevel: 0, - }); - }, speechTimeout); - } - } - } - } catch (error) { - const logger = videoLoggerSystem.getLogger('RNSpeechDetector'); - logger.error('error checking audio level from stats', error); - } - }; - - const intervalId = setInterval(checkAudioLevel, 250); - return () => { - clearInterval(intervalId); - clearTimeout(speechTimer); - clearTimeout(silenceTimer); - }; - } - - private cleanupAudioStream() { - if (!this.audioStream) { - return; - } - this.audioStream.getTracks().forEach((track) => track.stop()); - if ( - // @ts-expect-error release() is present in react-native-webrtc - typeof this.audioStream.release === 'function' - ) { - // @ts-expect-error called to dispose the stream in RN - this.audioStream.release(); - } - } - - private forwardIceCandidate( - destination: RTCPeerConnection, - candidate: RTCIceCandidate | null, - ) { - if ( - this.isStopped || - !candidate || - destination.signalingState === 'closed' - ) { - return; - } - destination.addIceCandidate(candidate).catch(() => { - // silently ignore the error - const logger = videoLoggerSystem.getLogger('RNSpeechDetector'); - logger.info('cannot add ice candidate - ignoring'); - }); - } -} diff --git a/packages/client/src/helpers/__tests__/RNSpeechDetector.test.ts b/packages/client/src/helpers/__tests__/RNSpeechDetector.test.ts deleted file mode 100644 index 4aec55d4da..0000000000 --- a/packages/client/src/helpers/__tests__/RNSpeechDetector.test.ts +++ /dev/null @@ -1,52 +0,0 @@ -import '../../rtc/__tests__/mocks/webrtc.mocks'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { RNSpeechDetector } from '../RNSpeechDetector'; - -describe('RNSpeechDetector', () => { - // Shared test setup stubs RTCPeerConnection with a vi.fn constructor. - // We keep a typed handle to that constructor to inspect created instances. - let rtcPeerConnectionMockCtor: ReturnType; - - beforeEach(() => { - rtcPeerConnectionMockCtor = - globalThis.RTCPeerConnection as unknown as ReturnType; - rtcPeerConnectionMockCtor.mockClear(); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - it('ignores late ICE candidates after cleanup', async () => { - const stream = { - getTracks: () => [], - } as unknown as MediaStream; - const detector = new RNSpeechDetector(stream); - - const cleanup = await detector.start(() => {}); - cleanup(); - - // start() creates two peer connections (pc1 and pc2). We pull them from - // constructor call results to inspect listener wiring and ICE forwarding. - const [pc1, pc2] = rtcPeerConnectionMockCtor.mock.results.map( - (result) => result.value, - ); - - // Find the registered ICE callback and invoke it manually after cleanup to - // simulate a late ICE event arriving during teardown. - const onIceCandidate = pc1.addEventListener.mock.calls.find( - ([eventName]: [string]) => eventName === 'icecandidate', - )?.[1] as ((e: RTCPeerConnectionIceEvent) => void) | undefined; - - expect(onIceCandidate).toBeDefined(); - onIceCandidate?.({ - candidate: { candidate: 'candidate:1 1 UDP 0 127.0.0.1 11111 typ host' }, - } as unknown as RTCPeerConnectionIceEvent); - - expect(pc1.removeEventListener).toHaveBeenCalledWith( - 'icecandidate', - onIceCandidate, - ); - expect(pc2.addIceCandidate).not.toHaveBeenCalled(); - }); -}); diff --git a/packages/client/src/types.ts b/packages/client/src/types.ts index 4c544ac1bb..f7fb3c23e1 100644 --- a/packages/client/src/types.ts +++ b/packages/client/src/types.ts @@ -462,6 +462,15 @@ export type StreamRNVideoSDKGlobals = { */ check(permission: 'microphone' | 'camera'): Promise; }; + nativeEvents: { + speechActivity: { + /** + * Subscribes to native speech activity events. + * Returns an unsubscribe function. + */ + subscribe(cb: (state: { isSoundDetected: boolean }) => void): () => void; + }; + }; }; declare global { diff --git a/packages/react-native-sdk/CLAUDE.md b/packages/react-native-sdk/CLAUDE.md index 51f0c83a16..3871a311bf 100644 --- a/packages/react-native-sdk/CLAUDE.md +++ b/packages/react-native-sdk/CLAUDE.md @@ -353,7 +353,6 @@ Observable streams for push events: - `useScreenShareButton` - Screen share button logic - `useScreenshot` - Screenshot detection (iOS) - `usePaginatedLayoutSortPreset` - Participant sorting for paginated layouts -- `useSpeechDetection` - Audio level detection - `useTrackDimensions` - Track video dimensions - `usePermissionRequest` - Media permission requests - `usePermissionNotification` - Permission request notifications diff --git a/packages/react-native-sdk/src/hooks/index.ts b/packages/react-native-sdk/src/hooks/index.ts index deeebd622b..572299f2e7 100644 --- a/packages/react-native-sdk/src/hooks/index.ts +++ b/packages/react-native-sdk/src/hooks/index.ts @@ -9,5 +9,4 @@ export * from './useScreenShareButton'; export * from './useScreenShareAudioMixing'; export * from './useTrackDimensions'; export * from './useScreenshot'; -export * from './useSpeechDetection'; export * from './useModeration'; diff --git a/packages/react-native-sdk/src/hooks/useSpeechDetection.ts b/packages/react-native-sdk/src/hooks/useSpeechDetection.ts deleted file mode 100644 index cda61b9761..0000000000 --- a/packages/react-native-sdk/src/hooks/useSpeechDetection.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { useEffect, useState } from 'react'; -import { - type SoundDetectorState, - RNSpeechDetector, -} from '@stream-io/video-client'; -import { useCallStateHooks } from '@stream-io/video-react-bindings'; - -/** - * Hook that provides speech detection info using the RNSpeechDetector. - * - * @returns An object containing the current audio level (0 - 1) and whether sound is detected. - */ -export function useSpeechDetection() { - const [audioState, setAudioState] = useState({ - isSoundDetected: false, - audioLevel: 0, - }); - const { useMicrophoneState } = useCallStateHooks(); - const { isEnabled, mediaStream } = useMicrophoneState(); - - useEffect(() => { - if (!isEnabled) return; - - const detector = new RNSpeechDetector(mediaStream); - const start = detector.start((state: SoundDetectorState) => { - setAudioState(state); - }); - - return () => { - start.then((stop) => stop()); - }; - }, [mediaStream, isEnabled]); - - return audioState; -} diff --git a/packages/react-native-sdk/src/utils/internal/registerSDKGlobals.ts b/packages/react-native-sdk/src/utils/internal/registerSDKGlobals.ts index 5a6e5bfdb3..7c9f7b5b30 100644 --- a/packages/react-native-sdk/src/utils/internal/registerSDKGlobals.ts +++ b/packages/react-native-sdk/src/utils/internal/registerSDKGlobals.ts @@ -1,5 +1,6 @@ import { StreamRNVideoSDKGlobals } from '@stream-io/video-client'; import { NativeModules, PermissionsAndroid, Platform } from 'react-native'; +import { audioDeviceModuleEvents } from '@stream-io/react-native-webrtc'; import { getCallingxLibIfAvailable } from '../push/libs/callingx'; import { endCallingxCall, @@ -85,6 +86,18 @@ const streamRNVideoSDKGlobals: StreamRNVideoSDKGlobals = { ); }, }, + nativeEvents: { + speechActivity: { + subscribe(cb) { + const subscription = audioDeviceModuleEvents.addSpeechActivityListener( + (data) => { + cb({ isSoundDetected: data.event === 'started' }); + }, + ); + return () => subscription.remove(); + }, + }, + }, }; // Note: The global type declaration for `streamRNVideoSDK` is defined in diff --git a/sample-apps/react/egress-composite/src/main.scss b/sample-apps/react/egress-composite/src/main.scss index af48df9f3b..58e6a1e4b9 100644 --- a/sample-apps/react/egress-composite/src/main.scss +++ b/sample-apps/react/egress-composite/src/main.scss @@ -1,4 +1,3 @@ @layer video-sdk-layer, base-layer, overrides-layer; @import '@stream-io/video-react-sdk/dist/css/styles.css' layer(video-sdk-layer); - \ No newline at end of file