Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
__pycache__
*_generated.h
*.ptx
*.egg-info
*.egg-info
.DS_Store
29 changes: 28 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,31 @@
target_compile_definitions(${NOS_PLUGIN_TARGET} PRIVATE NOS_DISABLE_DEPRECATED)

add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/External/AudioFile)
target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE AudioFile)
target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE AudioFile)

# SystemAudioCapture has one backend per OS. The Source/ glob picks up every
# .cpp unconditionally, so exclude the platform files that don't match the
# current target to keep the build clean, then add the .mm file back on
# macOS (the glob doesn't match .mm extensions).
if (NOT WIN32)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureWindows.cpp
PROPERTIES HEADER_FILE_ONLY ON)
endif()

if (APPLE)
target_sources(${NOS_PLUGIN_TARGET} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureMac.mm)
# ARC keeps retain/release of SCStream/SCContentFilter/etc. out of our
# hair; the other .mm files in the engine deliberately avoid ARC, but
# here we hold Obj-C objects across method boundaries so it's worth it.
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureMac.mm
PROPERTIES COMPILE_FLAGS "-fobjc-arc")
target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE
"-framework Foundation"
"-framework CoreMedia"
"-framework CoreAudio"
"-framework AudioToolbox"
"-framework ScreenCaptureKit")
endif()
15 changes: 12 additions & 3 deletions Source/AudioOscilloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,23 @@ struct AudioOscilloscopeNode : NodeContext
{
uint32_t startSample = bin * samplesPerBin;
uint32_t endSample = std::min(startSample + samplesPerBin, numSamples);
// Guard the averaging: if numSamples is smaller than
// scopeTexSize * samplesPerBin, trailing bins have
// startSample >= numSamples, the inner loop runs zero times,
// and `binValue / 0` produces NaN. That NaN then flows into
// FrameHistory and poisons the bin's moving average for the
// lifetime of this node context.
if (endSample <= startSample)
{
currentFrameData[bin] = 0.0f;
continue;
}
float binValue = 0.0f;
for (uint32_t i = startSample; i < endSample; ++i)
{
binValue += monoAudio[i];
}
binValue /= (endSample - startSample); // Average the samples in this bin

currentFrameData[bin] = binValue;
currentFrameData[bin] = binValue / static_cast<float>(endSample - startSample);
}

// Store current frame data and calculate moving average
Expand Down
132 changes: 132 additions & 0 deletions Source/SystemAudioCapture.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright MediaZ Teknoloji A.S. All Rights Reserved.

#include "SystemAudioCapture.h"

#include "nosAudio/AudioConversions.hpp"

#include <algorithm>

namespace nos::audio
{
namespace
{
// Keep at most this many seconds of buffered source samples to stop a stalled
// consumer (or a hung execution graph) from growing memory unbounded. The
// existing WASAPI backend used 5 seconds; we keep the same budget for parity.
constexpr uint32_t MAX_BUFFERED_SECONDS = 5;
} // namespace

void SystemAudioCaptureBase::ResetBuffer()
{
std::lock_guard lock(BufferMutex);
CapturedSamples.clear();
}

void SystemAudioCaptureBase::PushInterleavedSamples(const float* samples,
uint32_t frameCount,
uint32_t sourceSampleRate,
uint8_t sourceChannelCount)
{
if (!samples || frameCount == 0 || sourceChannelCount == 0 || sourceSampleRate == 0)
return;

std::lock_guard lock(BufferMutex);

// Format renegotiated mid-stream — drop stale samples so the resampler
// doesn't mix two layouts into a single read.
if (sourceSampleRate != SourceSampleRate || sourceChannelCount != SourceChannelCount)
{
CapturedSamples.clear();
SourceSampleRate = sourceSampleRate;
SourceChannelCount = sourceChannelCount;
}

const size_t sampleCount = static_cast<size_t>(frameCount) * sourceChannelCount;
CapturedSamples.insert(CapturedSamples.end(), samples, samples + sampleCount);

const size_t maxSamples = static_cast<size_t>(SourceSampleRate) * SourceChannelCount * MAX_BUFFERED_SECONDS;
if (CapturedSamples.size() > maxSamples)
{
const size_t overflow = CapturedSamples.size() - maxSamples;
CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + overflow);
}
}

bool SystemAudioCaptureBase::ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain)
{
std::unique_lock lock(BufferMutex);

// Nothing has been pushed yet (or the stream is idle): emit silence.
if (SourceSampleRate == 0 || SourceChannelCount == 0)
{
for (uint32_t i = 0; i < numSamples * targetChannels; ++i)
outBuffer[i] = 0;
return false;
}

const float sampleRateRatio = static_cast<float>(SourceSampleRate) / static_cast<float>(TargetSampleRate);
const uint32_t sourceSamplesNeeded = static_cast<uint32_t>(numSamples * sampleRateRatio);

if (CapturedSamples.size() < static_cast<size_t>(sourceSamplesNeeded) * SourceChannelCount)
{
for (uint32_t i = 0; i < numSamples * targetChannels; ++i)
outBuffer[i] = 0;
return false;
}

// Linear interpolation resample + channel map + gain + int24 pack.
for (uint32_t i = 0; i < numSamples; ++i)
{
const float sourceIndex = i * sampleRateRatio;
const uint32_t sourceIndexInt = static_cast<uint32_t>(sourceIndex);
const float frac = sourceIndex - sourceIndexInt;

for (uint8_t ch = 0; ch < targetChannels; ++ch)
{
// Fold extra target channels onto source channel 0 (mono fallback).
const uint8_t sourceChannel = (ch < SourceChannelCount) ? ch : 0;

const uint32_t idx1 = sourceIndexInt * SourceChannelCount + sourceChannel;
const uint32_t idx2 = std::min<uint32_t>(idx1 + SourceChannelCount,
static_cast<uint32_t>(CapturedSamples.size() - 1));

if (idx1 < CapturedSamples.size() && idx2 < CapturedSamples.size())
{
const float sample1 = CapturedSamples[idx1];
const float sample2 = CapturedSamples[idx2];
float interpolated = sample1 + (sample2 - sample1) * frac;
interpolated *= gain;
outBuffer[i * targetChannels + ch] = FloatToShiftedInt24(interpolated);
}
else
{
outBuffer[i * targetChannels + ch] = 0;
}
}
}

const size_t samplesToRemove = static_cast<size_t>(sourceSamplesNeeded) * SourceChannelCount;
if (samplesToRemove < CapturedSamples.size())
CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + samplesToRemove);
else
CapturedSamples.clear();

// Post-read drift correction: ReadSamples consumes at exactly real-time
// rate, so any historical producer/consumer skew (startup gap, frame-drop
// stall, path-restart burst) would otherwise persist as permanent latency
// — we just pull from the head forever, staying N ms behind live. Cap the
// residual buffer at a small smoothing window; anything older gets dropped
// so the next read snaps back toward live. The discontinuity this causes
// is audibly a one-shot click, preferable to sustained lag.
constexpr float MAX_POST_READ_SECONDS = 0.1f;
const size_t maxKeep = static_cast<size_t>(static_cast<float>(SourceSampleRate) * MAX_POST_READ_SECONDS) *
SourceChannelCount;
if (maxKeep > 0 && CapturedSamples.size() > maxKeep)
{
const size_t drop = CapturedSamples.size() - maxKeep;
CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + drop);
}

return true;
}
} // namespace nos::audio
85 changes: 85 additions & 0 deletions Source/SystemAudioCapture.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright MediaZ Teknoloji A.S. All Rights Reserved.

#pragma once

#include <cstdint>
#include <memory>
#include <mutex>
#include <string>
#include <vector>

namespace nos::audio
{
// Platform-agnostic contract for capturing the host's system audio output
// (loopback). One concrete backend is linked per target OS; the node code
// only ever sees this interface. See the *Windows.cpp / *Mac.mm files for
// the actual WASAPI / ScreenCaptureKit implementations.
class ISystemAudioCapture
{
public:
virtual ~ISystemAudioCapture() = default;

// Prepare the backend for the node's requested target format. On failure
// returns false and leaves a user-readable reason in GetLastError().
virtual bool Initialize(uint32_t sampleRate, uint8_t channelCount) = 0;

// Begin producing samples into the internal ring buffer. Idempotent.
virtual bool Start() = 0;

// Stop the backend and drain its worker. Safe to call before Start() or
// more than once.
virtual void Stop() = 0;

// Pull numSamples interleaved shifted-int24 frames into outBuffer at the
// requested channel layout and gain. Returns true when real audio was
// delivered, false when the buffer was filled with silence because the
// backend has not produced enough data yet.
virtual bool ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) = 0;

// Drop any audio that has accumulated in the internal ring buffer. Called
// on path start so the consumer doesn't have to pay for latency that built
// up between Start() and the first ReadSamples.
virtual void DiscardBufferedSamples() = 0;

virtual const std::string& GetDeviceName() const = 0;
virtual const std::string& GetLastError() const = 0;

// Platform factory — defined once per OS in its own TU.
static std::unique_ptr<ISystemAudioCapture> Create();
};

// Shared ring-buffer + resampler scaffolding. Platform backends only have to
// push interleaved float frames via PushInterleavedSamples; the base handles
// rate conversion, gain, and int24 packing so the WASAPI / ScreenCaptureKit
// files can stay focused on their respective native API dances.
class SystemAudioCaptureBase : public ISystemAudioCapture
{
public:
bool ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) override;
void DiscardBufferedSamples() override { ResetBuffer(); }
const std::string& GetDeviceName() const override { return DeviceName; }
const std::string& GetLastError() const override { return LastError; }

protected:
// Feed interleaved Float32 samples from the platform capture callback.
// If sourceSampleRate or sourceChannelCount differ from the previous call
// the internal buffer is reset, so format renegotiation mid-stream can't
// produce torn audio.
void PushInterleavedSamples(const float* samples,
uint32_t frameCount,
uint32_t sourceSampleRate,
uint8_t sourceChannelCount);

// Drop any buffered samples — used when the node reinitializes.
void ResetBuffer();

std::mutex BufferMutex;
std::vector<float> CapturedSamples;
uint32_t SourceSampleRate = 0;
uint8_t SourceChannelCount = 0;
uint32_t TargetSampleRate = 0;
uint8_t TargetChannelCount = 0;
std::string DeviceName;
std::string LastError;
};
} // namespace nos::audio
Loading