From 865275463aca8cc8db4bed4003b56fc4a7dc5514 Mon Sep 17 00:00:00 2001 From: Takashi Fujita Date: Fri, 29 Aug 2025 12:38:15 +0900 Subject: [PATCH 1/2] feat: add MediaStreamTrack support to speech recognition --- src/speech.test.ts | 85 ++++++++++++++++++++++++++++++++++++++++++++++ src/speech.ts | 30 ++++++++++++++-- 2 files changed, 113 insertions(+), 2 deletions(-) diff --git a/src/speech.test.ts b/src/speech.test.ts index de6cc4a..6e1ce49 100644 --- a/src/speech.test.ts +++ b/src/speech.test.ts @@ -156,6 +156,91 @@ describe('speech function', () => { // Assert: 期待される結果を確認 expect(mockRecognitionInstance.start).toHaveBeenCalledTimes(1) + expect(mockRecognitionInstance.start).toHaveBeenCalledWith() + }) + + it('When calling start method with MediaStreamTrack, it passes the track to recognition.start', () => { + // Arrange + const recognitionObj = speech({}) + const mockAudioTrack = { + kind: 'audio', + readyState: 'live', + } as MediaStreamTrack + + // Act + recognitionObj.start(mockAudioTrack) + + // Assert + expect(mockRecognitionInstance.start).toHaveBeenCalledTimes(1) + expect(mockRecognitionInstance.start).toHaveBeenCalledWith(mockAudioTrack) + }) + + it('When calling start method with video track, it throws InvalidStateError', () => { + // Arrange + const recognitionObj = speech({}) + const mockVideoTrack = { + kind: 'video', + readyState: 'live', + } as MediaStreamTrack + + // Act & Assert + expect(() => recognitionObj.start(mockVideoTrack)).toThrow(DOMException) + expect(() => recognitionObj.start(mockVideoTrack)).toThrow( + 'The provided MediaStreamTrack must be an audio track' + ) + expect(mockRecognitionInstance.start).not.toHaveBeenCalled() + }) + + it('When calling start method with ended audio track, it throws InvalidStateError', () => { + // Arrange + const recognitionObj = speech({}) + const mockEndedTrack = { + kind: 'audio', + readyState: 'ended', + } as MediaStreamTrack + + // Act & Assert + expect(() => recognitionObj.start(mockEndedTrack)).toThrow(DOMException) + expect(() => recognitionObj.start(mockEndedTrack)).toThrow( + 'The provided MediaStreamTrack must be in "live" state' + ) + expect(mockRecognitionInstance.start).not.toHaveBeenCalled() + }) + + it('When audioTrack is provided in options, it uses that track on start', () => { + // Arrange + const mockAudioTrack = { + kind: 'audio', + readyState: 'live', + } as MediaStreamTrack + const recognitionObj = speech({ audioTrack: mockAudioTrack }) + + // Act + recognitionObj.start() + + // Assert + expect(mockRecognitionInstance.start).toHaveBeenCalledTimes(1) + expect(mockRecognitionInstance.start).toHaveBeenCalledWith(mockAudioTrack) + }) + + it('When both options.audioTrack and parameter audioTrack are provided, parameter takes precedence', () => { + // Arrange + const optionsTrack = { + kind: 'audio', + readyState: 'live', + } as MediaStreamTrack + const parameterTrack = { + kind: 'audio', + readyState: 'live', + } as MediaStreamTrack + const recognitionObj = speech({ audioTrack: optionsTrack }) + + // Act + recognitionObj.start(parameterTrack) + + // Assert + expect(mockRecognitionInstance.start).toHaveBeenCalledTimes(1) + expect(mockRecognitionInstance.start).toHaveBeenCalledWith(parameterTrack) }) it('When calling stop method, it stops the recognition', () => { diff --git a/src/speech.ts b/src/speech.ts index 168b14b..90e0e35 100644 --- a/src/speech.ts +++ b/src/speech.ts @@ -50,6 +50,12 @@ export interface SpeechOptions { * エラー時のコールバック */ onError?: (error: SpeechRecognitionErrorCode) => void + + /** + * 音声入力として使用するMediaStreamTrack + * 指定しない場合はデフォルトのマイクを使用 + */ + audioTrack?: MediaStreamTrack } /** @@ -111,9 +117,29 @@ export function speech(options: SpeechOptions = {}) { return { /** * 音声認識を開始 + * @param audioTrack - オプション: 使用する音声トラック */ - start: () => { - recognition.start() + start: (audioTrack?: MediaStreamTrack) => { + const trackToUse = audioTrack || options.audioTrack + + if (trackToUse) { + if (trackToUse.kind !== 'audio') { + throw new DOMException( + 'The provided MediaStreamTrack must be an audio track', + 'InvalidStateError' + ) + } + if (trackToUse.readyState !== 'live') { + throw new DOMException( + 'The provided MediaStreamTrack must be in "live" state', + 'InvalidStateError' + ) + } + // @ts-expect-error - Web Speech API仕様の新しいメソッドシグネチャ + recognition.start(trackToUse) + } else { + recognition.start() + } }, /** From f46f4e23dde4e86f9338212306e3f0199b01ec53 Mon Sep 17 00:00:00 2001 From: Takashi Fujita Date: Fri, 29 Aug 2025 16:15:19 +0900 Subject: [PATCH 2/2] =?UTF-8?q?test:=203A=E3=83=91=E3=82=BF=E3=83=BC?= =?UTF-8?q?=E3=83=B3=E3=81=AE=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92?= =?UTF-8?q?=E6=97=A5=E6=9C=AC=E8=AA=9E=E3=81=A7=E7=B5=B1=E4=B8=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/speech.test.ts | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/speech.test.ts b/src/speech.test.ts index 6e1ce49..3d19454 100644 --- a/src/speech.test.ts +++ b/src/speech.test.ts @@ -160,30 +160,30 @@ describe('speech function', () => { }) it('When calling start method with MediaStreamTrack, it passes the track to recognition.start', () => { - // Arrange + // Arrange: 操作に必要な準備 const recognitionObj = speech({}) const mockAudioTrack = { kind: 'audio', readyState: 'live', } as MediaStreamTrack - // Act + // Act: 結果を得るために必要な操作 recognitionObj.start(mockAudioTrack) - // Assert + // Assert: 期待される結果を確認 expect(mockRecognitionInstance.start).toHaveBeenCalledTimes(1) expect(mockRecognitionInstance.start).toHaveBeenCalledWith(mockAudioTrack) }) it('When calling start method with video track, it throws InvalidStateError', () => { - // Arrange + // Arrange: 操作に必要な準備 const recognitionObj = speech({}) const mockVideoTrack = { kind: 'video', readyState: 'live', } as MediaStreamTrack - // Act & Assert + // Act & Assert: 操作とアサーションを組み合わせる expect(() => recognitionObj.start(mockVideoTrack)).toThrow(DOMException) expect(() => recognitionObj.start(mockVideoTrack)).toThrow( 'The provided MediaStreamTrack must be an audio track' @@ -192,14 +192,14 @@ describe('speech function', () => { }) it('When calling start method with ended audio track, it throws InvalidStateError', () => { - // Arrange + // Arrange: 操作に必要な準備 const recognitionObj = speech({}) const mockEndedTrack = { kind: 'audio', readyState: 'ended', } as MediaStreamTrack - // Act & Assert + // Act & Assert: 操作とアサーションを組み合わせる expect(() => recognitionObj.start(mockEndedTrack)).toThrow(DOMException) expect(() => recognitionObj.start(mockEndedTrack)).toThrow( 'The provided MediaStreamTrack must be in "live" state' @@ -208,23 +208,23 @@ describe('speech function', () => { }) it('When audioTrack is provided in options, it uses that track on start', () => { - // Arrange + // Arrange: 操作に必要な準備 const mockAudioTrack = { kind: 'audio', readyState: 'live', } as MediaStreamTrack const recognitionObj = speech({ audioTrack: mockAudioTrack }) - // Act + // Act: 結果を得るために必要な操作 recognitionObj.start() - // Assert + // Assert: 期待される結果を確認 expect(mockRecognitionInstance.start).toHaveBeenCalledTimes(1) expect(mockRecognitionInstance.start).toHaveBeenCalledWith(mockAudioTrack) }) it('When both options.audioTrack and parameter audioTrack are provided, parameter takes precedence', () => { - // Arrange + // Arrange: 操作に必要な準備 const optionsTrack = { kind: 'audio', readyState: 'live', @@ -235,10 +235,10 @@ describe('speech function', () => { } as MediaStreamTrack const recognitionObj = speech({ audioTrack: optionsTrack }) - // Act + // Act: 結果を得るために必要な操作 recognitionObj.start(parameterTrack) - // Assert + // Assert: 期待される結果を確認 expect(mockRecognitionInstance.start).toHaveBeenCalledTimes(1) expect(mockRecognitionInstance.start).toHaveBeenCalledWith(parameterTrack) })