nodos-dev · shamilatesoglu · Apr 19, 2026 · Apr 21, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/.gitignore b/.gitignore
@@ -8,4 +8,5 @@
 __pycache__
 *_generated.h
 *.ptx
-*.egg-info
+*.egg-info
+.DS_Store
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,4 +3,31 @@
 target_compile_definitions(${NOS_PLUGIN_TARGET} PRIVATE NOS_DISABLE_DEPRECATED)
 
 add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/External/AudioFile)
-target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE AudioFile)
+target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE AudioFile)
+
+# SystemAudioCapture has one backend per OS. The Source/ glob picks up every
+# .cpp unconditionally, so exclude the platform files that don't match the
+# current target to keep the build clean, then add the .mm file back on
+# macOS (the glob doesn't match .mm extensions).
+if (NOT WIN32)
+	set_source_files_properties(
+		${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureWindows.cpp
+		PROPERTIES HEADER_FILE_ONLY ON)
+endif()
+
+if (APPLE)
+	target_sources(${NOS_PLUGIN_TARGET} PRIVATE
+		${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureMac.mm)
+	# ARC keeps retain/release of SCStream/SCContentFilter/etc. out of our
+	# hair; the other .mm files in the engine deliberately avoid ARC, but
+	# here we hold Obj-C objects across method boundaries so it's worth it.
+	set_source_files_properties(
+		${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureMac.mm
+		PROPERTIES COMPILE_FLAGS "-fobjc-arc")
+	target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE
+		"-framework Foundation"
+		"-framework CoreMedia"
+		"-framework CoreAudio"
+		"-framework AudioToolbox"
+		"-framework ScreenCaptureKit")
+endif()
diff --git a/Source/AudioOscilloscope.cpp b/Source/AudioOscilloscope.cpp
@@ -112,14 +112,23 @@ struct AudioOscilloscopeNode : NodeContext
 		{
 			uint32_t startSample = bin * samplesPerBin;
 			uint32_t endSample = std::min(startSample + samplesPerBin, numSamples);
+			// Guard the averaging: if numSamples is smaller than
+			// scopeTexSize * samplesPerBin, trailing bins have
+			// startSample >= numSamples, the inner loop runs zero times,
+			// and `binValue / 0` produces NaN. That NaN then flows into
+			// FrameHistory and poisons the bin's moving average for the
+			// lifetime of this node context.
+			if (endSample <= startSample)
+			{
+				currentFrameData[bin] = 0.0f;
+				continue;
+			}
 			float binValue = 0.0f;
 			for (uint32_t i = startSample; i < endSample; ++i)
 			{
 				binValue += monoAudio[i];
 			}
-			binValue /= (endSample - startSample); // Average the samples in this bin
-
-			currentFrameData[bin] = binValue;
+			currentFrameData[bin] = binValue / static_cast<float>(endSample - startSample);
 		}
 
 		// Store current frame data and calculate moving average

diff --git a/Source/SystemAudioCapture.cpp b/Source/SystemAudioCapture.cpp
@@ -0,0 +1,132 @@
+// Copyright MediaZ Teknoloji A.S. All Rights Reserved.
+
+#include "SystemAudioCapture.h"
+
+#include "nosAudio/AudioConversions.hpp"
+
+#include <algorithm>
+
+namespace nos::audio
+{
+namespace
+{
+// Keep at most this many seconds of buffered source samples to stop a stalled
+// consumer (or a hung execution graph) from growing memory unbounded. The
+// existing WASAPI backend used 5 seconds; we keep the same budget for parity.
+constexpr uint32_t MAX_BUFFERED_SECONDS = 5;
+} // namespace
+
+void SystemAudioCaptureBase::ResetBuffer()
+{
+	std::lock_guard lock(BufferMutex);
+	CapturedSamples.clear();
+}
+
+void SystemAudioCaptureBase::PushInterleavedSamples(const float* samples,
+													uint32_t frameCount,
+													uint32_t sourceSampleRate,
+													uint8_t sourceChannelCount)
+{
+	if (!samples || frameCount == 0 || sourceChannelCount == 0 || sourceSampleRate == 0)
+		return;
+
+	std::lock_guard lock(BufferMutex);
+
+	// Format renegotiated mid-stream — drop stale samples so the resampler
+	// doesn't mix two layouts into a single read.
+	if (sourceSampleRate != SourceSampleRate || sourceChannelCount != SourceChannelCount)
+	{
+		CapturedSamples.clear();
+		SourceSampleRate = sourceSampleRate;
+		SourceChannelCount = sourceChannelCount;
+	}
+
+	const size_t sampleCount = static_cast<size_t>(frameCount) * sourceChannelCount;
+	CapturedSamples.insert(CapturedSamples.end(), samples, samples + sampleCount);
+
+	const size_t maxSamples = static_cast<size_t>(SourceSampleRate) * SourceChannelCount * MAX_BUFFERED_SECONDS;
+	if (CapturedSamples.size() > maxSamples)
+	{
+		const size_t overflow = CapturedSamples.size() - maxSamples;
+		CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + overflow);
+	}
+}
+
+bool SystemAudioCaptureBase::ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain)
+{
+	std::unique_lock lock(BufferMutex);
+
+	// Nothing has been pushed yet (or the stream is idle): emit silence.
+	if (SourceSampleRate == 0 || SourceChannelCount == 0)
+	{
+		for (uint32_t i = 0; i < numSamples * targetChannels; ++i)
+			outBuffer[i] = 0;
+		return false;
+	}
+
+	const float sampleRateRatio = static_cast<float>(SourceSampleRate) / static_cast<float>(TargetSampleRate);
+	const uint32_t sourceSamplesNeeded = static_cast<uint32_t>(numSamples * sampleRateRatio);
+
+	if (CapturedSamples.size() < static_cast<size_t>(sourceSamplesNeeded) * SourceChannelCount)
+	{
+		for (uint32_t i = 0; i < numSamples * targetChannels; ++i)
+			outBuffer[i] = 0;
+		return false;
+	}
+
+	// Linear interpolation resample + channel map + gain + int24 pack.
+	for (uint32_t i = 0; i < numSamples; ++i)
+	{
+		const float sourceIndex = i * sampleRateRatio;
+		const uint32_t sourceIndexInt = static_cast<uint32_t>(sourceIndex);
+		const float frac = sourceIndex - sourceIndexInt;
+
+		for (uint8_t ch = 0; ch < targetChannels; ++ch)
+		{
+			// Fold extra target channels onto source channel 0 (mono fallback).
+			const uint8_t sourceChannel = (ch < SourceChannelCount) ? ch : 0;
+
+			const uint32_t idx1 = sourceIndexInt * SourceChannelCount + sourceChannel;
+			const uint32_t idx2 = std::min<uint32_t>(idx1 + SourceChannelCount,
+													 static_cast<uint32_t>(CapturedSamples.size() - 1));
+
+			if (idx1 < CapturedSamples.size() && idx2 < CapturedSamples.size())
+			{
+				const float sample1 = CapturedSamples[idx1];
+				const float sample2 = CapturedSamples[idx2];
+				float interpolated = sample1 + (sample2 - sample1) * frac;
+				interpolated *= gain;
+				outBuffer[i * targetChannels + ch] = FloatToShiftedInt24(interpolated);
+			}
+			else
+			{
+				outBuffer[i * targetChannels + ch] = 0;
+			}
+		}
+	}
+
+	const size_t samplesToRemove = static_cast<size_t>(sourceSamplesNeeded) * SourceChannelCount;
+	if (samplesToRemove < CapturedSamples.size())
+		CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + samplesToRemove);
+	else
+		CapturedSamples.clear();
+
+	// Post-read drift correction: ReadSamples consumes at exactly real-time
+	// rate, so any historical producer/consumer skew (startup gap, frame-drop
+	// stall, path-restart burst) would otherwise persist as permanent latency
+	// — we just pull from the head forever, staying N ms behind live. Cap the
+	// residual buffer at a small smoothing window; anything older gets dropped
+	// so the next read snaps back toward live. The discontinuity this causes
+	// is audibly a one-shot click, preferable to sustained lag.
+	constexpr float MAX_POST_READ_SECONDS = 0.1f;
+	const size_t maxKeep = static_cast<size_t>(static_cast<float>(SourceSampleRate) * MAX_POST_READ_SECONDS) *
+						   SourceChannelCount;
+	if (maxKeep > 0 && CapturedSamples.size() > maxKeep)
+	{
+		const size_t drop = CapturedSamples.size() - maxKeep;
+		CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + drop);
+	}
+
+	return true;
+}
+} // namespace nos::audio
diff --git a/Source/SystemAudioCapture.h b/Source/SystemAudioCapture.h
@@ -0,0 +1,85 @@
+// Copyright MediaZ Teknoloji A.S. All Rights Reserved.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace nos::audio
+{
+// Platform-agnostic contract for capturing the host's system audio output
+// (loopback). One concrete backend is linked per target OS; the node code
+// only ever sees this interface. See the *Windows.cpp / *Mac.mm files for
+// the actual WASAPI / ScreenCaptureKit implementations.
+class ISystemAudioCapture
+{
+public:
+	virtual ~ISystemAudioCapture() = default;
+
+	// Prepare the backend for the node's requested target format. On failure
+	// returns false and leaves a user-readable reason in GetLastError().
+	virtual bool Initialize(uint32_t sampleRate, uint8_t channelCount) = 0;
+
+	// Begin producing samples into the internal ring buffer. Idempotent.
+	virtual bool Start() = 0;
+
+	// Stop the backend and drain its worker. Safe to call before Start() or
+	// more than once.
+	virtual void Stop() = 0;
+
+	// Pull numSamples interleaved shifted-int24 frames into outBuffer at the
+	// requested channel layout and gain. Returns true when real audio was
+	// delivered, false when the buffer was filled with silence because the
+	// backend has not produced enough data yet.
+	virtual bool ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) = 0;
+
+	// Drop any audio that has accumulated in the internal ring buffer. Called
+	// on path start so the consumer doesn't have to pay for latency that built
+	// up between Start() and the first ReadSamples.
+	virtual void DiscardBufferedSamples() = 0;
+
+	virtual const std::string& GetDeviceName() const = 0;
+	virtual const std::string& GetLastError() const = 0;
+
+	// Platform factory — defined once per OS in its own TU.
+	static std::unique_ptr<ISystemAudioCapture> Create();
+};
+
+// Shared ring-buffer + resampler scaffolding. Platform backends only have to
+// push interleaved float frames via PushInterleavedSamples; the base handles
+// rate conversion, gain, and int24 packing so the WASAPI / ScreenCaptureKit
+// files can stay focused on their respective native API dances.
+class SystemAudioCaptureBase : public ISystemAudioCapture
+{
+public:
+	bool ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) override;
+	void DiscardBufferedSamples() override { ResetBuffer(); }
+	const std::string& GetDeviceName() const override { return DeviceName; }
+	const std::string& GetLastError() const override { return LastError; }
+
+protected:
+	// Feed interleaved Float32 samples from the platform capture callback.
+	// If sourceSampleRate or sourceChannelCount differ from the previous call
+	// the internal buffer is reset, so format renegotiation mid-stream can't
+	// produce torn audio.
+	void PushInterleavedSamples(const float* samples,
+								uint32_t frameCount,
+								uint32_t sourceSampleRate,
+								uint8_t sourceChannelCount);
+
+	// Drop any buffered samples — used when the node reinitializes.
+	void ResetBuffer();
+
+	std::mutex BufferMutex;
+	std::vector<float> CapturedSamples;
+	uint32_t SourceSampleRate = 0;
+	uint8_t SourceChannelCount = 0;
+	uint32_t TargetSampleRate = 0;
+	uint8_t TargetChannelCount = 0;
+	std::string DeviceName;
+	std::string LastError;
+};
+} // namespace nos::audio