From dbaa7b08140c83f596c2240b2011dcf856ed02e1 Mon Sep 17 00:00:00 2001 From: Roger Chappel Date: Fri, 1 May 2026 18:21:21 +1000 Subject: [PATCH 1/3] docs: define BargeKit turn-taking state machine --- docs/TURN_TAKING_STATE_MACHINE.md | 162 ++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 docs/TURN_TAKING_STATE_MACHINE.md diff --git a/docs/TURN_TAKING_STATE_MACHINE.md b/docs/TURN_TAKING_STATE_MACHINE.md new file mode 100644 index 0000000..7893c33 --- /dev/null +++ b/docs/TURN_TAKING_STATE_MACHINE.md @@ -0,0 +1,162 @@ +# BargeKit Turn-Taking State Machine + +Status: Wave 1 canonical contract +Spec version: `1.0.0` + +## Product North Star + +BargeKit is the local-first turn-taking layer for voice agents. It decides when microphone input is accepted, ignored, noise-gated, or treated as an interruption so agent apps can feel naturally interruptible without adopting a full voice-agent framework. + +## Design Goals + +- **Local-first:** core state machine and default audio-level policy run locally. +- **Deterministic:** the same input signal sequence must produce the same state/events. +- **Interruptible:** barge-in is a first-class transition, not an afterthought. +- **Echo-aware:** agent output can gate or duck microphone handling. +- **Mode-aware:** push-to-talk, VAD, wake-hook, and half-duplex behavior use the same states with different guards. +- **Observable:** every state transition emits an event suitable for UI, VoicePath, and AgentPulse integrations. + +## Modes + +| Mode | Meaning | +|---|---| +| `push_to_talk` | Input is accepted only while an explicit push control is active. | +| `vad` | Voice activity detection opens/closes speech segments. | +| `wake_hook` | External wake detector arms BargeKit before VAD/segment handling. | +| `half_duplex` | Microphone input is muted while agent output is active unless interruption is explicitly enabled. | + +## States + +| State | Meaning | +|---|---| +| `idle` | No active input/output gating. | +| `armed` | Ready to accept user speech, waiting for a qualifying signal. | +| `listening` | Microphone input is open and being evaluated. | +| `user_speaking` | User speech segment is active. | +| `agent_speaking` | Agent output is active. | +| `barge_pending` | User speech is detected while agent output is active and may interrupt. | +| `interrupted` | Agent output was interrupted by user speech. | +| `muted` | Input is intentionally closed by policy or host app. | +| `noise_gated` | Input signal exists but is below speech qualification policy. | +| `cooldown` | Short guard window after speech/interruption to prevent flapping. | +| `error` | State machine cannot continue without host intervention. | + +## Inputs + +| Input | Required data | Meaning | +|---|---|---| +| `session.start` | `mode` | Turn-taking session begins. | +| `session.stop` | `reason` | Turn-taking session ends. | +| `mic.level` | `level`, `timeMs` | Audio level sample or aggregate. | +| `vad.speech.start` | `confidence`, `timeMs` | VAD says speech began. | +| `vad.speech.end` | `confidence`, `timeMs` | VAD says speech ended. | +| `push.down` | `timeMs` | Push-to-talk pressed. | +| `push.up` | `timeMs` | Push-to-talk released. | +| `wake.detected` | `confidence`, `timeMs` | External wake hook fired. | +| `agent.output.start` | `outputId` | Agent audio playback began. | +| `agent.output.end` | `outputId` | Agent audio playback ended. | +| `agent.output.ducked` | `outputId` | Output was ducked by host/VoicePath. | +| `mute.on` | `reason` | Host muted input. | +| `mute.off` | `reason` | Host unmuted input. | +| `config.update` | `patch` | Runtime policy changed. | +| `error.raise` | `code`, `summary` | Host or adapter reported a failure. | + +## Configuration Contract + +```json +{ + "mode": "vad", + "speechThreshold": 0.62, + "noiseFloor": 0.12, + "minSpeechMs": 120, + "silenceMs": 450, + "debounceMs": 80, + "cooldownMs": 250, + "bargeIn": { + "enabled": true, + "whileAgentSpeaking": true, + "minSpeechMs": 140, + "duckOutput": true + }, + "echoGuard": { + "enabled": true, + "suppressWhileOutput": false, + "duckOnBarge": true + } +} +``` + +Required fields: `mode`, `minSpeechMs`, `silenceMs`, `debounceMs`, `bargeIn.enabled`. + +## Transition Rules + +### Session lifecycle + +- `idle` + `session.start` -> `armed` +- any non-terminal state + `session.stop` -> `idle` +- any state + `error.raise` -> `error` + +### Push-to-talk + +- `armed` + `push.down` -> `listening` +- `listening` + qualifying speech -> `user_speaking` +- `user_speaking` + `push.up` -> `cooldown` +- `cooldown` after `cooldownMs` -> `armed` + +### VAD/open microphone + +- `armed` + qualifying `vad.speech.start` -> `user_speaking` +- `armed` + level below threshold but above noise floor -> `noise_gated` +- `noise_gated` + qualifying speech -> `user_speaking` +- `noise_gated` + silence for `silenceMs` -> `armed` +- `user_speaking` + silence for `silenceMs` -> `cooldown` +- `cooldown` after `cooldownMs` -> `armed` + +### Agent output and barge-in + +- `armed` + `agent.output.start` -> `agent_speaking` +- `agent_speaking` + qualifying user speech when barge-in enabled -> `barge_pending` +- `barge_pending` after `bargeIn.minSpeechMs` -> `interrupted` +- `interrupted` emits output interruption/duck request and then -> `user_speaking` +- `agent_speaking` + `agent.output.end` -> `armed` +- `agent_speaking` + user speech when barge-in disabled -> `noise_gated` or remain `agent_speaking` depending on `echoGuard.suppressWhileOutput` + +### Muting + +- any non-error state + `mute.on` -> `muted` +- `muted` + `mute.off` -> `armed` +- `muted` ignores speech/VAD inputs but may record suppressed events. + +## Event Contract + +| Event | Emitted when | +|---|---| +| `bargekit.session.started` | Session enters `armed`. | +| `bargekit.session.stopped` | Session returns to `idle`. | +| `bargekit.state.changed` | Any state transition occurs. | +| `bargekit.user_speech.started` | User speech segment starts. | +| `bargekit.user_speech.ended` | User speech segment ends. | +| `bargekit.barge_in.requested` | Barge-in threshold is satisfied. | +| `bargekit.output.duck_requested` | Output should duck for interruption. | +| `bargekit.output.cancel_requested` | Output should stop for interruption. | +| `bargekit.input.muted` | Input is muted by host/policy. | +| `bargekit.input.noise_gated` | Input is suppressed as noise/echo. | +| `bargekit.error.recorded` | State machine enters `error`. | + +Events include `sessionId`, `state`, `previousState`, `mode`, `timeMs`, and `reason` when applicable. + +## Invariants + +- `user_speaking` and `agent_speaking` may overlap only through `barge_pending`/`interrupted`; otherwise the machine must choose a policy outcome. +- No barge-in may be emitted unless `bargeIn.enabled` is true. +- No speech segment starts until `minSpeechMs` is satisfied. +- No speech segment ends until `silenceMs` is satisfied. +- Muted input cannot produce `user_speech.started`. +- Echo/noise suppression must emit an observable suppression event. +- The core contract never sends audio off-device. + +## Integration Notes + +- VoicePath should subscribe to `bargekit.output.cancel_requested` or `bargekit.output.duck_requested` to interrupt playback. +- AgentPulse should map speech and interruption events into its canonical event stream. +- AgentGlow can render listening/speaking/interrupted states from `bargekit.state.changed`. From 63a3d89940d30df7e8d2d8231dd765ddeeb00e32 Mon Sep 17 00:00:00 2001 From: Roger Chappel Date: Fri, 1 May 2026 18:21:21 +1000 Subject: [PATCH 2/3] feat: publish BargeKit state machine constants --- src/index.js | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/index.js diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..9676168 --- /dev/null +++ b/src/index.js @@ -0,0 +1,61 @@ +export const BARGEKIT_STATE_MACHINE_SPEC_VERSION = '1.0.0'; + +export const MODES = Object.freeze(['push_to_talk', 'vad', 'wake_hook', 'half_duplex']); + +export const STATES = Object.freeze([ + 'idle', + 'armed', + 'listening', + 'user_speaking', + 'agent_speaking', + 'barge_pending', + 'interrupted', + 'muted', + 'noise_gated', + 'cooldown', + 'error' +]); + +export const INPUTS = Object.freeze([ + 'session.start', + 'session.stop', + 'mic.level', + 'vad.speech.start', + 'vad.speech.end', + 'push.down', + 'push.up', + 'wake.detected', + 'agent.output.start', + 'agent.output.end', + 'agent.output.ducked', + 'mute.on', + 'mute.off', + 'config.update', + 'error.raise' +]); + +export const EVENTS = Object.freeze([ + 'bargekit.session.started', + 'bargekit.session.stopped', + 'bargekit.state.changed', + 'bargekit.user_speech.started', + 'bargekit.user_speech.ended', + 'bargekit.barge_in.requested', + 'bargekit.output.duck_requested', + 'bargekit.output.cancel_requested', + 'bargekit.input.muted', + 'bargekit.input.noise_gated', + 'bargekit.error.recorded' +]); + +export const REQUIRED_CONFIG_FIELDS = Object.freeze([ + 'mode', + 'minSpeechMs', + 'silenceMs', + 'debounceMs', + 'bargeIn.enabled' +]); + +export function canRequestBargeIn(config, currentState) { + return config?.bargeIn?.enabled === true && currentState === 'agent_speaking'; +} From 326f8ab5caee95790db6648435be8194b93392b0 Mon Sep 17 00:00:00 2001 From: Roger Chappel Date: Fri, 1 May 2026 18:21:21 +1000 Subject: [PATCH 3/3] test: cover BargeKit state machine contract --- test/state-machine-contract.test.js | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 test/state-machine-contract.test.js diff --git a/test/state-machine-contract.test.js b/test/state-machine-contract.test.js new file mode 100644 index 0000000..06dac1b --- /dev/null +++ b/test/state-machine-contract.test.js @@ -0,0 +1,51 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { + EVENTS, + INPUTS, + MODES, + REQUIRED_CONFIG_FIELDS, + STATES, + canRequestBargeIn +} from '../src/index.js'; + +const contract = readFileSync(new URL('../docs/TURN_TAKING_STATE_MACHINE.md', import.meta.url), 'utf8'); + +test('contract documents modes, states, inputs, transitions, events, and invariants', () => { + for (const section of ['Modes', 'States', 'Inputs', 'Transition Rules', 'Event Contract', 'Invariants']) { + assert.match(contract, new RegExp(`## ${section}`)); + } +}); + +test('state machine modes cover V1 operation styles', () => { + assert.deepEqual(MODES, ['push_to_talk', 'vad', 'wake_hook', 'half_duplex']); +}); + +test('states include barge-in, muted, noise gate, cooldown, and error states', () => { + for (const state of ['barge_pending', 'interrupted', 'muted', 'noise_gated', 'cooldown', 'error']) { + assert.ok(STATES.includes(state)); + } +}); + +test('inputs include microphone, VAD, push, wake, output, mute, and error signals', () => { + for (const input of ['mic.level', 'vad.speech.start', 'push.down', 'wake.detected', 'agent.output.start', 'mute.on', 'error.raise']) { + assert.ok(INPUTS.includes(input)); + } +}); + +test('events cover speech, barge-in, output control, mute, noise gate, and errors', () => { + for (const event of ['bargekit.user_speech.started', 'bargekit.barge_in.requested', 'bargekit.output.cancel_requested', 'bargekit.input.noise_gated', 'bargekit.error.recorded']) { + assert.ok(EVENTS.includes(event)); + } +}); + +test('configuration fields include timing and barge-in policy', () => { + assert.deepEqual(REQUIRED_CONFIG_FIELDS, ['mode', 'minSpeechMs', 'silenceMs', 'debounceMs', 'bargeIn.enabled']); +}); + +test('barge-in request requires enabled policy and active agent speech', () => { + assert.equal(canRequestBargeIn({ bargeIn: { enabled: true } }, 'agent_speaking'), true); + assert.equal(canRequestBargeIn({ bargeIn: { enabled: false } }, 'agent_speaking'), false); + assert.equal(canRequestBargeIn({ bargeIn: { enabled: true } }, 'armed'), false); +});