Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/// <reference types="node" />
import { Transform, TransformOptions } from 'node:stream';

declare class VAD {
/**
* Creates a new Voice Activity Detection instance
* @param mode VAD sensitivity mode. Default is Mode.NORMAL if omitted.
*/
constructor(mode: VAD.Mode);

/**
* Analyse the given samples and notify the detected voice event via promise.
* Note: Sample buffers should be rather short (36ms to 144ms) and the sample rate
* no higher than 32kHz for best results. Sample rates higher than 16kHz provide
* no benefit to the VAD algorithm, as human voice patterns center around 4000 to 6000Hz.
* @param samples Buffer object containing 16bit signed values
* @param sampleRate Audio sample rate.
* @returns Promise resolving to a VAD.Event code
*/
processAudio(samples: Buffer, sampleRate: VAD.SampleRate): Promise<VAD.Event>;

/**
* Analyse the given samples and notify the detected voice event via promise.
* Note: Sample buffers should be rather short (36ms to 144ms) and the sample rate
* no higher than 32kHz for best results. Sample rates higher than 16kHz provide
* no benefit to the VAD algorithm, as human voice patterns center around 4000 to 6000Hz.
* @param samples Buffer object containing 32bit normalized float values
* @param sampleRate Audio sample rate.
* @returns Promise resolving to a VAD.Event code
*/
processAudioFloat(samples: Buffer, sampleRate: VAD.SampleRate): Promise<VAD.Event>;

/**
* Creates a new VAD stream for continuous audio processing
* @param opts Stream options
*/
static createStream(opts?: VAD.StreamOptions): VAD.VADStream;

/**
* Converts a 16-bit signed audio buffer to float format
* @param buffer Input buffer
* @returns Float buffer
*/
static toFloatBuffer(buffer: Buffer): Buffer;
}

declare namespace VAD {
/** Valid sample rates for audio processing (in Hz). 16000Hz recommended for best performance/accuracy tradeoff. */
export type SampleRate = 8000 | 16000 | 32000 | 48000;

export enum Event {
/** Constant for voice detection errors */
ERROR = -1,
/** Constant for voice detection results with no detected voices */
SILENCE = 0,
/** Constant for voice detection results with detected voice */
VOICE = 1,
/** Constant for voice detection results with detected noise (Not implemented yet) */
NOISE = 2
}

export enum Mode {
/**
* Normal voice detection mode. Suitable for high bitrate, low-noise data.
* May classify noise as voice, too. The default value if mode is omitted in the constructor.
*/
NORMAL = 0,
/** Detection mode optimised for low-bitrate audio */
LOW_BITRATE = 1,
/** Detection mode best suited for somewhat noisy, lower quality audio */
AGGRESSIVE = 2,
/** Detection mode with lowest miss-rate. Works well for most inputs */
VERY_AGGRESSIVE = 3
}

export interface StreamOptions {
/**
* VAD sensitivity mode
* @default Mode.NORMAL
*/
mode?: Mode;

/**
* Audio sample rate in Hz
* @default 16000
*/
audioFrequency?: SampleRate;

/**
* Time in milliseconds to wait before marking the end of speech
* @default 1000
*/
debounceTime?: number;
}

export interface SpeechData {
/** Current state of speech */
state: boolean;
/** True on chunk when speech starts */
start: boolean;
/** True on chunk when speech ends */
end: boolean;
/** Time when speech started */
startTime: number;
/** Duration of current speech block */
duration: number;
}

export interface StreamOutput {
/** Current seek time in audio */
time: number;
/** Original audio data */
audioData: Buffer;
/** Speech detection information */
speech: SpeechData;
}

export class VADStream extends Transform {
constructor(options?: StreamOptions & TransformOptions);

/** Current VAD instance */
vad: VAD;
/** Audio sample rate */
audioFrequency: number;
/** Debounce time in milliseconds */
debounceTime: number;
/** Current speech state */
state: boolean;
/** Time when speech started */
startTime: number;
/** Time of last detected speech */
lastSpeech: number;
}
}

export = VAD;
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"release-major": "npm version major && npm publish"
},
"main": "./index.js",
"types": "./index.d.ts",
"license": "MIT",
"engines": {
"node": ">=6.14.3",
Expand Down