Snirpo · domdomegg · Jan 12, 2025
diff --git a/index.d.ts b/index.d.ts
@@ -0,0 +1,136 @@
+/// <reference types="node" />
+import { Transform, TransformOptions } from 'node:stream';
+
+declare class VAD {
+    /**
+     * Creates a new Voice Activity Detection instance
+     * @param mode VAD sensitivity mode. Default is Mode.NORMAL if omitted.
+     */
+    constructor(mode: VAD.Mode);
+
+    /**
+     * Analyse the given samples and notify the detected voice event via promise.
+     * Note: Sample buffers should be rather short (36ms to 144ms) and the sample rate 
+     * no higher than 32kHz for best results. Sample rates higher than 16kHz provide 
+     * no benefit to the VAD algorithm, as human voice patterns center around 4000 to 6000Hz.
+     * @param samples Buffer object containing 16bit signed values
+     * @param sampleRate Audio sample rate.
+     * @returns Promise resolving to a VAD.Event code
+     */
+    processAudio(samples: Buffer, sampleRate: VAD.SampleRate): Promise<VAD.Event>;
+
+    /**
+     * Analyse the given samples and notify the detected voice event via promise.
+     * Note: Sample buffers should be rather short (36ms to 144ms) and the sample rate 
+     * no higher than 32kHz for best results. Sample rates higher than 16kHz provide 
+     * no benefit to the VAD algorithm, as human voice patterns center around 4000 to 6000Hz.
+     * @param samples Buffer object containing 32bit normalized float values
+     * @param sampleRate Audio sample rate.
+     * @returns Promise resolving to a VAD.Event code
+     */
+    processAudioFloat(samples: Buffer, sampleRate: VAD.SampleRate): Promise<VAD.Event>;
+
+    /**
+     * Creates a new VAD stream for continuous audio processing
+     * @param opts Stream options
+     */
+    static createStream(opts?: VAD.StreamOptions): VAD.VADStream;
+
+    /**
+     * Converts a 16-bit signed audio buffer to float format
+     * @param buffer Input buffer
+     * @returns Float buffer
+     */
+    static toFloatBuffer(buffer: Buffer): Buffer;
+}
+
+declare namespace VAD {
+    /** Valid sample rates for audio processing (in Hz). 16000Hz recommended for best performance/accuracy tradeoff. */
+    export type SampleRate = 8000 | 16000 | 32000 | 48000;
+
+    export enum Event {
+        /** Constant for voice detection errors */
+        ERROR = -1,
+        /** Constant for voice detection results with no detected voices */
+        SILENCE = 0,
+        /** Constant for voice detection results with detected voice */
+        VOICE = 1,
+        /** Constant for voice detection results with detected noise (Not implemented yet) */
+        NOISE = 2
+    }
+
+    export enum Mode {
+        /** 
+         * Normal voice detection mode. Suitable for high bitrate, low-noise data.
+         * May classify noise as voice, too. The default value if mode is omitted in the constructor.
+         */
+        NORMAL = 0,
+        /** Detection mode optimised for low-bitrate audio */
+        LOW_BITRATE = 1,
+        /** Detection mode best suited for somewhat noisy, lower quality audio */
+        AGGRESSIVE = 2,
+        /** Detection mode with lowest miss-rate. Works well for most inputs */
+        VERY_AGGRESSIVE = 3
+    }
+
+    export interface StreamOptions {
+        /**
+         * VAD sensitivity mode
+         * @default Mode.NORMAL
+         */
+        mode?: Mode;
+
+        /**
+         * Audio sample rate in Hz
+         * @default 16000
+         */
+        audioFrequency?: SampleRate;
+
+        /**
+         * Time in milliseconds to wait before marking the end of speech
+         * @default 1000
+         */
+        debounceTime?: number;
+    }
+
+    export interface SpeechData {
+        /** Current state of speech */
+        state: boolean;
+        /** True on chunk when speech starts */
+        start: boolean;
+        /** True on chunk when speech ends */
+        end: boolean;
+        /** Time when speech started */
+        startTime: number;
+        /** Duration of current speech block */
+        duration: number;
+    }
+
+    export interface StreamOutput {
+        /** Current seek time in audio */
+        time: number;
+        /** Original audio data */
+        audioData: Buffer;
+        /** Speech detection information */
+        speech: SpeechData;
+    }
+
+    export class VADStream extends Transform {
+        constructor(options?: StreamOptions & TransformOptions);
+
+        /** Current VAD instance */
+        vad: VAD;
+        /** Audio sample rate */
+        audioFrequency: number;
+        /** Debounce time in milliseconds */
+        debounceTime: number;
+        /** Current speech state */
+        state: boolean;
+        /** Time when speech started */
+        startTime: number;
+        /** Time of last detected speech */
+        lastSpeech: number;
+    }
+}
+
+export = VAD;
diff --git a/package.json b/package.json
@@ -12,6 +12,7 @@
     "release-major": "npm version major && npm publish"
   },
   "main": "./index.js",
+  "types": "./index.d.ts",
   "license": "MIT",
   "engines": {
     "node": ">=6.14.3",