diff --git a/Cargo.lock b/Cargo.lock index 8b8f626..16caadd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -233,6 +233,14 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "echo-system-types" +version = "0.1.0" +source = "git+https://github.com/dnacenta/echo-system-types?tag=v0.1.0#febdf25f02fc1a470fe412ccc2d2f40e5356f888" +dependencies = [ + "serde", +] + [[package]] name = "encoding_rs" version = "0.8.35" @@ -1856,6 +1864,7 @@ dependencies = [ "bytes", "chrono", "dotenvy", + "echo-system-types", "hound", "rand 0.8.5", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index ecb8ab4..b107473 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ description = "Voice interface for Claude Code via Twilio" license = "AGPL-3.0" [dependencies] +echo-system-types = { git = "https://github.com/dnacenta/echo-system-types", tag = "v0.1.0" } axum = { version = "0.8", features = ["ws"] } tokio = { version = "1", features = ["full"] } tokio-tungstenite = "0.28" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..0581974 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,256 @@ +//! voice-echo — Voice interface for AI entities via Twilio. +//! +//! This crate provides a complete voice pipeline: Twilio WebSocket audio streaming, +//! voice activity detection, speech-to-text (Groq Whisper), LLM bridge (Claude), +//! and text-to-speech (Inworld). It can be used as a standalone binary or as a +//! library dependency in echo-system. +//! +//! # Usage as a library +//! +//! ```no_run +//! use voice_echo::{VoiceEcho, config::Config}; +//! +//! # fn run() { +//! let config = Config::load().expect("config"); +//! let mut voice = VoiceEcho::new(config); +//! // voice.start().await.expect("server"); +//! # } +//! ``` + +pub mod api; +pub mod config; +pub mod greeting; +pub mod pipeline; +pub mod twilio; + +use std::collections::HashMap; +use std::net::SocketAddr; +use std::sync::Arc; + +use axum::routing::{get, post}; +use axum::Router; +use echo_system_types::{HealthStatus, SetupPrompt}; +use tokio::sync::Mutex; +use tower_http::trace::TraceLayer; + +use config::Config; +use pipeline::audio; +use pipeline::claude::ClaudeBridge; +use pipeline::stt::SttClient; +use pipeline::tts::TtsClient; +use twilio::outbound::TwilioClient; + +/// Shared application state accessible from all handlers. +#[derive(Clone)] +pub struct AppState { + pub config: Config, + pub stt: Arc, + pub tts: Arc, + pub claude: Arc, + pub twilio: Arc, + /// Pre-converted mu-law hold music data, if configured. + pub hold_music: Option>>, + /// Context for outbound calls, keyed by call_sid. + /// Consumed on first utterance so the LLM knows why it called. + pub call_contexts: Arc>>, +} + +/// The voice-echo plugin. Manages the voice pipeline lifecycle. +pub struct VoiceEcho { + config: Config, + state: Option, + shutdown_tx: Option>, +} + +impl VoiceEcho { + /// Create a new VoiceEcho instance from config. + pub fn new(config: Config) -> Self { + Self { + config, + state: None, + shutdown_tx: None, + } + } + + /// Start the voice server. Builds state, binds the listener, and serves. + /// This blocks until the server is shut down via `stop()`. + pub async fn start(&mut self) -> Result<(), Box> { + let config = &self.config; + + // Load hold music if configured + let hold_music = config.hold_music.as_ref().and_then(|hm| { + let path = std::path::Path::new(&hm.file); + match audio::load_wav_as_mulaw(path, hm.volume) { + Ok(data) => { + tracing::info!( + path = %hm.file, + volume = hm.volume, + mulaw_bytes = data.len(), + "Loaded hold music" + ); + Some(Arc::new(data)) + } + Err(e) => { + tracing::warn!(path = %hm.file, "Failed to load hold music: {e}"); + None + } + } + }); + + // Build shared state + let state = AppState { + stt: Arc::new(SttClient::new( + config.groq.api_key.clone(), + config.groq.model.clone(), + )), + tts: Arc::new(TtsClient::new( + config.inworld.api_key.clone(), + config.inworld.voice_id.clone(), + config.inworld.model.clone(), + )), + claude: Arc::new(ClaudeBridge::new( + config.claude.session_timeout_secs, + config.claude.dangerously_skip_permissions, + config + .claude + .self_path + .as_ref() + .map(std::path::PathBuf::from), + )), + twilio: Arc::new(TwilioClient::new( + &config.twilio, + &config.server.external_url, + )), + config: config.clone(), + hold_music, + call_contexts: Arc::new(Mutex::new(HashMap::new())), + }; + + self.state = Some(state.clone()); + + let app = self.build_router(state); + + let addr: SocketAddr = format!("{}:{}", config.server.host, config.server.port) + .parse() + .map_err(|e| format!("Invalid server address: {e}"))?; + + tracing::info!(%addr, "Listening"); + + let listener = tokio::net::TcpListener::bind(addr).await?; + + let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel::<()>(); + self.shutdown_tx = Some(shutdown_tx); + + axum::serve(listener, app) + .with_graceful_shutdown(async { + let _ = shutdown_rx.await; + }) + .await?; + + Ok(()) + } + + /// Stop the voice server gracefully. + pub async fn stop(&mut self) -> Result<(), Box> { + if let Some(tx) = self.shutdown_tx.take() { + let _ = tx.send(()); + } + self.state = None; + Ok(()) + } + + /// Report health status. + pub fn health(&self) -> HealthStatus { + match &self.state { + Some(_) => HealthStatus::Healthy, + None => HealthStatus::Down("not started".into()), + } + } + + /// Return the Axum router with all voice-echo routes. + /// Returns `None` if the server hasn't been started (no state). + pub fn routes(&self) -> Option { + let state = self.state.as_ref()?; + Some(self.build_router(state.clone())) + } + + /// Configuration prompts for the echo-system init wizard. + pub fn setup_prompts() -> Vec { + vec![ + SetupPrompt { + key: "external_url".into(), + question: "External URL (where Twilio can reach this server):".into(), + required: true, + secret: false, + default: None, + }, + SetupPrompt { + key: "twilio_account_sid".into(), + question: "Twilio Account SID:".into(), + required: true, + secret: false, + default: None, + }, + SetupPrompt { + key: "twilio_auth_token".into(), + question: "Twilio Auth Token:".into(), + required: true, + secret: true, + default: None, + }, + SetupPrompt { + key: "twilio_phone_number".into(), + question: "Twilio Phone Number (E.164):".into(), + required: true, + secret: false, + default: None, + }, + SetupPrompt { + key: "groq_api_key".into(), + question: "Groq API Key (for Whisper STT):".into(), + required: true, + secret: true, + default: None, + }, + SetupPrompt { + key: "inworld_api_key".into(), + question: "Inworld API Key (for TTS):".into(), + required: true, + secret: true, + default: None, + }, + SetupPrompt { + key: "inworld_voice_id".into(), + question: "Inworld Voice ID:".into(), + required: false, + secret: false, + default: Some("Olivia".into()), + }, + SetupPrompt { + key: "api_token".into(), + question: "API Token (for outbound call auth):".into(), + required: false, + secret: true, + default: None, + }, + ] + } + + fn build_router(&self, state: AppState) -> Router { + Router::new() + .route("/twilio/voice", post(twilio::webhook::handle_voice)) + .route( + "/twilio/voice/outbound", + post(twilio::webhook::handle_voice_outbound), + ) + .route("/twilio/media", get(twilio::media::handle_media_upgrade)) + .route("/api/call", post(api::outbound::handle_call)) + .route("/health", get(health_handler)) + .layer(TraceLayer::new_for_http()) + .with_state(state) + } +} + +async fn health_handler() -> &'static str { + "ok" +} diff --git a/src/main.rs b/src/main.rs index f0fe4ae..99c51a2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,44 +1,10 @@ -mod api; -mod config; -mod greeting; -mod pipeline; mod setup; -mod twilio; -use std::collections::HashMap; -use std::net::SocketAddr; -use std::sync::Arc; - -use tokio::sync::Mutex; - -use axum::routing::{get, post}; -use axum::Router; -use tower_http::trace::TraceLayer; - -use config::Config; -use pipeline::audio; -use pipeline::claude::ClaudeBridge; -use pipeline::stt::SttClient; -use pipeline::tts::TtsClient; -use twilio::outbound::TwilioClient; +use voice_echo::config::Config; +use voice_echo::VoiceEcho; const VERSION: &str = env!("CARGO_PKG_VERSION"); -/// Shared application state accessible from all handlers. -#[derive(Clone)] -pub struct AppState { - pub config: Config, - pub stt: Arc, - pub tts: Arc, - pub claude: Arc, - pub twilio: Arc, - /// Pre-converted mu-law hold music data, if configured. - pub hold_music: Option>>, - /// Context for outbound calls, keyed by call_sid. - /// Consumed on first utterance so Claude knows why it called. - pub call_contexts: Arc>>, -} - fn main() { let args: Vec = std::env::args().collect(); @@ -81,7 +47,6 @@ async fn server() { ) .init(); - // Load config let config = match Config::load() { Ok(c) => c, Err(e) => { @@ -96,86 +61,10 @@ async fn server() { "Starting voice-echo" ); - // Load hold music if configured - let hold_music = config.hold_music.as_ref().and_then(|hm| { - let path = std::path::Path::new(&hm.file); - match audio::load_wav_as_mulaw(path, hm.volume) { - Ok(data) => { - tracing::info!( - path = %hm.file, - volume = hm.volume, - mulaw_bytes = data.len(), - "Loaded hold music" - ); - Some(Arc::new(data)) - } - Err(e) => { - tracing::warn!(path = %hm.file, "Failed to load hold music: {e}"); - None - } - } - }); - - // Build shared state - let state = AppState { - stt: Arc::new(SttClient::new( - config.groq.api_key.clone(), - config.groq.model.clone(), - )), - tts: Arc::new(TtsClient::new( - config.inworld.api_key.clone(), - config.inworld.voice_id.clone(), - config.inworld.model.clone(), - )), - claude: Arc::new(ClaudeBridge::new( - config.claude.session_timeout_secs, - config.claude.dangerously_skip_permissions, - config - .claude - .self_path - .as_ref() - .map(std::path::PathBuf::from), - )), - twilio: Arc::new(TwilioClient::new( - &config.twilio, - &config.server.external_url, - )), - config: config.clone(), - hold_music, - call_contexts: Arc::new(Mutex::new(HashMap::new())), - }; - - // Build router - let app = Router::new() - // Twilio webhooks - .route("/twilio/voice", post(twilio::webhook::handle_voice)) - .route( - "/twilio/voice/outbound", - post(twilio::webhook::handle_voice_outbound), - ) - // Twilio media stream (WebSocket) - .route("/twilio/media", get(twilio::media::handle_media_upgrade)) - // Outbound call API (for n8n) - .route("/api/call", post(api::outbound::handle_call)) - // Health check - .route("/health", get(health)) - .layer(TraceLayer::new_for_http()) - .with_state(state); - - // Start server - let addr: SocketAddr = format!("{}:{}", config.server.host, config.server.port) - .parse() - .expect("Invalid server address"); - - tracing::info!(%addr, "Listening"); + let mut voice = VoiceEcho::new(config); - let listener = tokio::net::TcpListener::bind(addr) - .await - .expect("Failed to bind"); - - axum::serve(listener, app).await.expect("Server error"); -} - -async fn health() -> &'static str { - "ok" + if let Err(e) = voice.start().await { + tracing::error!("Server error: {e}"); + std::process::exit(1); + } } diff --git a/src/setup/checks.rs b/src/setup/checks.rs index 77f5970..ae21859 100644 --- a/src/setup/checks.rs +++ b/src/setup/checks.rs @@ -13,7 +13,6 @@ struct CheckResult { pub fn run_checks() -> bool { let checks = vec![ check_tool("rustc", &["--version"], "rustc"), - check_tool("claude", &["--version"], "claude CLI"), check_tool("openssl", &["version"], "openssl"), ];