Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 157 additions & 58 deletions Runtime/Scripts/MicrophoneSource.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using LiveKit.Internal;

Expand All @@ -13,25 +14,49 @@ namespace LiveKit
/// </remarks>
sealed public class MicrophoneSource : RtcAudioSource
{
private readonly GameObject _sourceObject;
// Unity sometimes misreports the microphone clip's sample rate (e.g. a Bluetooth headset in
// a bad audio-routing state reports clip.frequency=16000 while the clip is actually filled
// at ~48-51 kHz). So we push to the native source at a fixed, trusted rate and resample the
// captured audio — whose true rate we measure at runtime from GetPosition — to it.
private const uint TargetSampleRate = 48000;
private const float RateMeasureSeconds = 0.3f;

private readonly string _deviceName;
private readonly uint _requestedRate;

public override event Action<float[], int, int> AudioRead;

private bool _disposed = false;
private bool _started = false;
private volatile bool _capturing = false;
private int _lastReadPos = 0;

// Streaming linear-resampler state (input = measured mic rate, output = TargetSampleRate).
private double _resamplePos = 0.0;
private float _resamplePrev = 0f;

/// <summary>
/// Creates a new microphone source for the given device.
/// </summary>
/// <param name="deviceName">The name of the device to capture from. Use <see cref="Microphone.devices"/> to
/// get the list of available devices.</param>
/// <param name="sourceObject">The GameObject to attach the AudioSource to. The object must be kept in the scene
/// for the duration of the source's lifetime.</param>
public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone)
/// <param name="sourceObject">Unused; retained for backwards compatibility. The microphone is now read
/// directly from its clip, so no scene GameObject/AudioSource is required.</param>
public MicrophoneSource(string deviceName, GameObject sourceObject)
: base(RtcAudioSourceType.AudioSourceMicrophone, TargetSampleRate, 1)
{
_deviceName = deviceName;
_sourceObject = sourceObject;
_requestedRate = ResolveMicrophoneSampleRate(deviceName);
}

// The rate we ask Microphone.Start for (a hint Unity may ignore). Clamped into the device's
// reported range; the actual captured rate is measured at runtime and may differ.
private static uint ResolveMicrophoneSampleRate(string deviceName)
{
Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq);
if (minFreq == 0 && maxFreq == 0)
return TargetSampleRate;
return (uint)Mathf.Clamp((int)TargetSampleRate, minFreq, maxFreq);
}

/// <summary>
Expand Down Expand Up @@ -61,13 +86,6 @@ public override void Start()

private IEnumerator StartMicrophone()
{
// Validate that the GameObject is still valid before starting
if (_sourceObject == null)
{
Utils.Error("MicrophoneSource: GameObject is null, cannot start microphone");
yield break;
}

// Verify microphone is still authorized (could change during background)
if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
{
Expand All @@ -82,7 +100,7 @@ private IEnumerator StartMicrophone()
_deviceName,
loop: true,
lengthSec: 1,
frequency: (int)DefaultMicrophoneSampleRate
frequency: (int)_requestedRate
);
}
catch (Exception e)
Expand All @@ -97,29 +115,6 @@ private IEnumerator StartMicrophone()
yield break;
}

// Ensure no duplicate components exist before adding new ones.
// This is important during app resume on iOS where components might not be
// fully destroyed yet due to Unity's deferred Destroy().
var existingSource = _sourceObject.GetComponent<AudioSource>();
if (existingSource != null)
UnityEngine.Object.DestroyImmediate(existingSource);

var existingProbe = _sourceObject.GetComponent<AudioProbe>();
if (existingProbe != null)
{
existingProbe.AudioRead -= OnAudioRead;
UnityEngine.Object.DestroyImmediate(existingProbe);
}

var source = _sourceObject.AddComponent<AudioSource>();
source.clip = clip;
source.loop = true;

var probe = _sourceObject.AddComponent<AudioProbe>();
// Clear the audio data after it is read as to not play it through the speaker locally.
probe.ClearAfterInvocation();
probe.AudioRead += OnAudioRead;

// Wait for microphone to actually start producing data with a timeout
const float timeout = 2f;
float elapsed = 0f;
Expand All @@ -135,8 +130,130 @@ private IEnumerator StartMicrophone()
yield break;
}

source.Play();
Utils.Debug($"MicrophoneSource device='{_deviceName}' started successfully");
Utils.Info($"MicrophoneSource device='{_deviceName}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={_requestedRate}Hz target={TargetSampleRate}Hz");

_capturing = true;
MonoBehaviourContext.RunCoroutine(CaptureLoop(clip));
}

// Reads new microphone samples straight from the looping clip's ring buffer, resamples them
// from the device's true (measured) rate to TargetSampleRate, and pushes them. Reading the
// ring buffer directly (instead of playing the clip and tapping OnAudioFilterRead) avoids the
// playback-vs-capture clock drift that produced choppy audio. Runs on the main thread; the
// native source's queue absorbs the per-frame pacing jitter.
private IEnumerator CaptureLoop(AudioClip clip)
{
int clipFrames = clip.samples; // frames per channel in the loop buffer
int channels = clip.channels;

// clip.frequency is unreliable in some device states, so measure the true capture rate
// from how fast GetPosition advances over a short window before we start pushing.
int prev = Microphone.GetPosition(_deviceName);
long advance = 0;
var measureSw = System.Diagnostics.Stopwatch.StartNew();
while (measureSw.Elapsed.TotalSeconds < RateMeasureSeconds && _capturing && !_disposed)
{
yield return null;
int p = Microphone.GetPosition(_deviceName);
advance += ((p - prev) % clipFrames + clipFrames) % clipFrames;
prev = p;
}
if (!_capturing || _disposed) yield break;

double measuredSecs = measureSw.Elapsed.TotalSeconds;
double realRate = (measuredSecs > 0 && advance > 0) ? ClampRate(advance / measuredSecs) : clip.frequency;
Utils.Info($"MicrophoneSource: measured capture rate {realRate:F0}Hz (clip.frequency={clip.frequency}Hz), resampling to {TargetSampleRate}Hz");

ResetResampler();
_lastReadPos = Microphone.GetPosition(_deviceName);

// Refine the rate estimate as we go so slow clock drift can't make the native buffer
// creep toward over/underrun.
long readSinceRefine = 0;
var refineSw = System.Diagnostics.Stopwatch.StartNew();

while (_capturing && !_disposed)
{
int micPos = Microphone.GetPosition(_deviceName);
// Drain everything between the last read and the write head, splitting at the ring
// wrap so each GetData read is contiguous.
while (_lastReadPos != micPos)
{
int end = micPos > _lastReadPos ? micPos : clipFrames;
int count = end - _lastReadPos;
EmitResampled(clip, channels, _lastReadPos, count, realRate);
readSinceRefine += count;
_lastReadPos = end % clipFrames;
}

double secs = refineSw.Elapsed.TotalSeconds;
if (secs >= 1.0 && readSinceRefine > 0)
{
realRate = 0.5 * realRate + 0.5 * ClampRate(readSinceRefine / secs); // EMA
readSinceRefine = 0;
refineSw.Restart();
}

yield return null;
}
}

private static double ClampRate(double r) => r < 8000 ? 8000 : (r > 192000 ? 192000 : r);

private void ResetResampler()
{
_resamplePos = 0.0;
_resamplePrev = 0f;
}

// Reads `count` contiguous frames at `startFrame`, downmixes to mono, resamples from
// `inputRate` to TargetSampleRate (streaming linear interpolation that carries state across
// calls), and pushes the result. `startFrame + count` never exceeds the clip length (callers
// split at the wrap), so the GetData read is always contiguous.
private void EmitResampled(AudioClip clip, int channels, int startFrame, int count, double inputRate)
{
if (count <= 0) return;

var interleaved = new float[count * channels];
clip.GetData(interleaved, startFrame);

float[] inMono;
if (channels == 1)
{
inMono = interleaved;
}
else
{
inMono = new float[count];
for (int f = 0; f < count; f++)
{
float sum = 0f;
for (int c = 0; c < channels; c++)
sum += interleaved[f * channels + c];
inMono[f] = sum / channels;
}
}

double step = inputRate / TargetSampleRate; // input samples advanced per output sample
var output = new List<float>((int)(count / step) + 2);

// Index -1 maps to the carried last sample of the previous chunk so interpolation is
// continuous across chunk/tick boundaries. pos stays >= -1.
double pos = _resamplePos;
while (pos < count - 1)
{
int i0 = (int)Math.Floor(pos);
float a = i0 < 0 ? _resamplePrev : inMono[i0];
float b = inMono[i0 + 1];
float frac = (float)(pos - i0);
output.Add(a * (1f - frac) + b * frac);
pos += step;
}
_resamplePrev = inMono[count - 1];
_resamplePos = pos - count;

if (output.Count > 0)
AudioRead?.Invoke(output.ToArray(), 1, (int)TargetSampleRate);
}

/// <summary>
Expand All @@ -152,33 +269,15 @@ public override void Stop()

private IEnumerator StopMicrophone()
{
_capturing = false;

if (Microphone.IsRecording(_deviceName))
Microphone.End(_deviceName);

// Check if GameObject is still valid before trying to access components
if (_sourceObject != null)
{
var probe = _sourceObject.GetComponent<AudioProbe>();
if (probe != null)
{
probe.AudioRead -= OnAudioRead;
UnityEngine.Object.Destroy(probe);
}

var source = _sourceObject.GetComponent<AudioSource>();
if (source != null)
UnityEngine.Object.Destroy(source);
}

Utils.Debug($"MicrophoneSource device='{_deviceName}' stopped");
yield return null;
}

private void OnAudioRead(float[] data, int channels, int sampleRate)
{
AudioRead?.Invoke(data, channels, sampleRate);
}

private void OnApplicationPause(bool pause)
{
if (!_started)
Expand Down
20 changes: 13 additions & 7 deletions Runtime/Scripts/RtcAudioSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,25 @@ private sealed class PendingAudioFrame
private int _audioReadCount = 0;

protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom)
: this(audioSourceType,
audioSourceType == RtcAudioSourceType.AudioSourceMicrophone ? DefaultMicrophoneSampleRate : DefaultSampleRate,
(uint)channels)
{ }

// Creates the native source with an explicit format. Sources that know their exact
// capture rate/channels up front (e.g. a microphone resolved from device caps) use this so
// the pushed frames match the native source and never trip a rate/channel mismatch.
protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels)
{
_sourceType = audioSourceType;
_expectedChannels = (uint)channels;
_expectedChannels = channels;
_expectedSampleRate = sampleRate;

using var request = FFIBridge.Instance.NewRequest<NewAudioSourceRequest>();
var newAudioSource = request.request;
newAudioSource.Type = AudioSourceType.AudioSourceNative;
newAudioSource.NumChannels = (uint)channels;
newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ?
DefaultMicrophoneSampleRate : DefaultSampleRate;
_expectedSampleRate = newAudioSource.SampleRate;

Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}");
newAudioSource.NumChannels = channels;
newAudioSource.SampleRate = sampleRate;

newAudioSource.Options = request.TempResource<AudioSourceOptions>();
newAudioSource.Options.EchoCancellation = true;
Expand Down
Loading