Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions apps/cli/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,14 @@ async fn ensure_parent_dirs(path: &Path) -> CliResult<()> {
Ok(())
}

async fn write_bytes_to(output: Option<&Path>, bytes: Vec<u8>) -> CliResult<()> {
async fn write_bytes_to(output: Option<&Path>, bytes: Vec<u8>, add_newline: bool) -> CliResult<()> {
if let Some(path) = output {
ensure_parent_dirs(path).await?;
tokio::fs::write(path, bytes)
let mut data = bytes;
if add_newline {
data.push(b'\n');
}
tokio::fs::write(path, data)
.await
.map_err(|e| CliError::operation_failed("write output", e.to_string()))?;
return Ok(());
Expand All @@ -58,14 +62,17 @@ async fn write_bytes_to(output: Option<&Path>, bytes: Vec<u8>) -> CliResult<()>
std::io::stdout()
.write_all(&bytes)
.map_err(|e| CliError::operation_failed("write output", e.to_string()))?;
std::io::stdout()
.write_all(b"\n")
.map_err(|e| CliError::operation_failed("write output", e.to_string()))?;
if add_newline {
std::io::stdout()
.write_all(b"\n")
.map_err(|e| CliError::operation_failed("write output", e.to_string()))?;
}
Ok(())
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File output missing trailing newline in write helper

Low Severity

write_bytes_to ignores the add_newline parameter when writing to a file (the Some(path) branch on line 18-23 returns early without checking add_newline). The old code explicitly appended "\n" when writing text to a file via tokio::fs::write(path, transcript + "\n"). Now, write_text and write_json both pass add_newline: true, but that flag only takes effect for the stdout path. Output files produced via --output will be missing their trailing newline, breaking POSIX text file conventions.

Fix in Cursor Fix in Web

}

pub async fn write_text(output: Option<&Path>, text: String) -> CliResult<()> {
write_bytes_to(output, (text + "\n").into_bytes()).await
// Text content; add single trailing newline for POSIX compliance
write_bytes_to(output, text.into_bytes(), true).await
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Double newline when writing text to stdout

Low Severity

write_text appends "\n" to the text before passing it to write_bytes_to, which then appends another b"\n" when writing to stdout. This produces a double trailing newline for text output to stdout. The old code used println! which added only a single newline. The write_json path is unaffected because serde_json::to_vec doesn't include a trailing newline.

Additional Locations (1)
Fix in Cursor Fix in Web


pub async fn write_json(output: Option<&Path>, value: &impl serde::Serialize) -> CliResult<()> {
Expand All @@ -76,7 +83,8 @@ pub async fn write_json(output: Option<&Path>, value: &impl serde::Serialize) ->
}
.map_err(|e| CliError::operation_failed("serialize response", e.to_string()))?;

write_bytes_to(output, bytes).await
// JSON output needs trailing newline for POSIX compliance
write_bytes_to(output, bytes, true).await
}

pub fn create_progress_bar(
Expand Down
16 changes: 10 additions & 6 deletions crates/audio-device/src/macos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl MacOSBackend {
.unwrap_or(TransportType::Unknown);

let is_default = default_device_id
.map(|id| device.0.0 == id)
.map(|id| device.0 .0 == id)
.unwrap_or(false);

let mut audio_device = AudioDevice {
Expand Down Expand Up @@ -102,7 +102,11 @@ impl MacOSBackend {
})
});

if detected { Some(true) } else { None }
if detected {
Some(true)
} else {
None
}
}

fn is_external_from_device(device: Option<ca::Device>) -> bool {
Expand All @@ -120,8 +124,8 @@ impl AudioDeviceBackend for MacOSBackend {
let ca_devices =
ca::System::devices().map_err(|e| Error::EnumerationFailed(format!("{:?}", e)))?;

let default_input_id = ca::System::default_input_device().ok().map(|d| d.0.0);
let default_output_id = ca::System::default_output_device().ok().map(|d| d.0.0);
let default_input_id = ca::System::default_input_device().ok().map(|d| d.0 .0);
let default_output_id = ca::System::default_output_device().ok().map(|d| d.0 .0);

let mut devices = Vec::new();

Expand Down Expand Up @@ -161,7 +165,7 @@ impl AudioDeviceBackend for MacOSBackend {
Ok(Self::create_audio_device(
&ca_device,
AudioDirection::Input,
Some(ca_device.0.0),
Some(ca_device.0 .0),
))
}

Expand All @@ -178,7 +182,7 @@ impl AudioDeviceBackend for MacOSBackend {
Ok(Self::create_audio_device(
&ca_device,
AudioDirection::Output,
Some(ca_device.0.0),
Some(ca_device.0 .0),
))
}

Expand Down
8 changes: 4 additions & 4 deletions crates/audio-device/src/windows.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
use crate::{AudioDevice, AudioDeviceBackend, AudioDirection, DeviceId, Error, TransportType};
use std::ffi::OsString;
use std::os::windows::ffi::OsStringExt;
use windows::core::{Interface, GUID, PCWSTR, PWSTR};
use windows::Win32::Devices::FunctionDiscovery::PKEY_Device_FriendlyName;
use windows::Win32::Media::Audio::Endpoints::IAudioEndpointVolume;
use windows::Win32::Media::Audio::{
DEVICE_STATE_ACTIVE, IMMDevice, IMMDeviceEnumerator, MMDeviceEnumerator, eAll, eCapture,
eConsole, eRender,
eAll, eCapture, eConsole, eRender, IMMDevice, IMMDeviceEnumerator, MMDeviceEnumerator,
DEVICE_STATE_ACTIVE,
};
use windows::Win32::System::Com::{
CLSCTX_ALL, COINIT_MULTITHREADED, CoCreateInstance, CoInitializeEx, CoUninitialize, STGM_READ,
CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_ALL, COINIT_MULTITHREADED, STGM_READ,
};
use windows::Win32::UI::Shell::PropertiesSystem::IPropertyStore;
use windows::core::{GUID, Interface, PCWSTR, PWSTR};

pub struct WindowsBackend;

Expand Down
89 changes: 87 additions & 2 deletions crates/owhisper-client/src/adapter/openai/live.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,18 @@ impl RealtimeSttAdapter for OpenAIAdapter {
false
}

fn build_ws_url(&self, api_base: &str, _params: &ListenParams, _channels: u8) -> url::Url {
let (mut url, existing_params) = Self::build_ws_url_from_base(api_base);
fn build_ws_url(&self, api_base: &str, params: &ListenParams, _channels: u8) -> url::Url {
// Detect Azure from the base URL and store flag for initial_message
if let Ok(parsed) = api_base.parse::<url::Url>() {
if let Some(host) = parsed.host_str() {
if Self::is_azure_host(host) {
self.set_azure(true);
}
}
}

let model = params.model.as_deref();
let (mut url, existing_params) = Self::build_ws_url_from_base_with_model(api_base, model);

if !existing_params.is_empty() {
let mut query_pairs = url.query_pairs_mut();
Expand Down Expand Up @@ -78,6 +88,11 @@ impl RealtimeSttAdapter for OpenAIAdapter {
None => default,
};

// Use the Azure flag set during build_ws_url (detected from api_base URL)
if self.is_azure() {
return self.build_azure_initial_message(model, language);
}

let session_config = SessionUpdateEvent {
event_type: "session.update".to_string(),
session: SessionConfig {
Expand Down Expand Up @@ -227,6 +242,76 @@ impl RealtimeSttAdapter for OpenAIAdapter {
}
}

impl OpenAIAdapter {
/// Build Azure OpenAI-specific initial message
/// Azure uses a different session update format: transcription_session.update
fn build_azure_initial_message(
&self,
model: &str,
language: Option<String>,
) -> Option<Message> {
let session_update = AzureTranscriptionSessionUpdate {
event_type: "transcription_session.update".to_string(),
session: AzureSessionConfig {
input_audio_format: "pcm16".to_string(),
input_audio_transcription: AzureTranscriptionConfig {
model: model.to_string(),
prompt: None,
language,
},
turn_detection: Some(AzureTurnDetection {
detection_type: VAD_DETECTION_TYPE.to_string(),
threshold: Some(VAD_THRESHOLD),
prefix_padding_ms: Some(VAD_PREFIX_PADDING_MS),
silence_duration_ms: Some(VAD_SILENCE_DURATION_MS),
}),
},
};

let json = serde_json::to_string(&session_update).ok()?;
tracing::debug!(payload = %json, "azure_openai_session_update_payload");
Some(Message::Text(json.into()))
}
}

// Azure OpenAI specific session message types

#[derive(Debug, Serialize)]
struct AzureTranscriptionSessionUpdate {
#[serde(rename = "type")]
event_type: String,
session: AzureSessionConfig,
}

#[derive(Debug, Serialize)]
struct AzureSessionConfig {
input_audio_format: String,
input_audio_transcription: AzureTranscriptionConfig,
#[serde(skip_serializing_if = "Option::is_none")]
turn_detection: Option<AzureTurnDetection>,
}

#[derive(Debug, Serialize)]
struct AzureTranscriptionConfig {
model: String,
#[serde(skip_serializing_if = "Option::is_none")]
prompt: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
language: Option<String>,
}

#[derive(Debug, Serialize)]
struct AzureTurnDetection {
#[serde(rename = "type")]
detection_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
threshold: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
prefix_padding_ms: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
silence_duration_ms: Option<u32>,
}

#[derive(Debug, Serialize)]
struct SessionUpdateEvent {
#[serde(rename = "type")]
Expand Down
Loading