diff --git a/.gitignore b/.gitignore index a9f8d80..a9e6c80 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ __pycache__ *_generated.h *.ptx -*.egg-info \ No newline at end of file +*.egg-info +.DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index f214161..a978fc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,4 +3,31 @@ target_compile_definitions(${NOS_PLUGIN_TARGET} PRIVATE NOS_DISABLE_DEPRECATED) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/External/AudioFile) -target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE AudioFile) \ No newline at end of file +target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE AudioFile) + +# SystemAudioCapture has one backend per OS. The Source/ glob picks up every +# .cpp unconditionally, so exclude the platform files that don't match the +# current target to keep the build clean, then add the .mm file back on +# macOS (the glob doesn't match .mm extensions). +if (NOT WIN32) + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureWindows.cpp + PROPERTIES HEADER_FILE_ONLY ON) +endif() + +if (APPLE) + target_sources(${NOS_PLUGIN_TARGET} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureMac.mm) + # ARC keeps retain/release of SCStream/SCContentFilter/etc. out of our + # hair; the other .mm files in the engine deliberately avoid ARC, but + # here we hold Obj-C objects across method boundaries so it's worth it. + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/Source/SystemAudioCaptureMac.mm + PROPERTIES COMPILE_FLAGS "-fobjc-arc") + target_link_libraries(${NOS_PLUGIN_TARGET} PRIVATE + "-framework Foundation" + "-framework CoreMedia" + "-framework CoreAudio" + "-framework AudioToolbox" + "-framework ScreenCaptureKit") +endif() diff --git a/Source/AudioOscilloscope.cpp b/Source/AudioOscilloscope.cpp index 9a06a61..77e59a8 100644 --- a/Source/AudioOscilloscope.cpp +++ b/Source/AudioOscilloscope.cpp @@ -112,14 +112,23 @@ struct AudioOscilloscopeNode : NodeContext { uint32_t startSample = bin * samplesPerBin; uint32_t endSample = std::min(startSample + samplesPerBin, numSamples); + // Guard the averaging: if numSamples is smaller than + // scopeTexSize * samplesPerBin, trailing bins have + // startSample >= numSamples, the inner loop runs zero times, + // and `binValue / 0` produces NaN. That NaN then flows into + // FrameHistory and poisons the bin's moving average for the + // lifetime of this node context. + if (endSample <= startSample) + { + currentFrameData[bin] = 0.0f; + continue; + } float binValue = 0.0f; for (uint32_t i = startSample; i < endSample; ++i) { binValue += monoAudio[i]; } - binValue /= (endSample - startSample); // Average the samples in this bin - - currentFrameData[bin] = binValue; + currentFrameData[bin] = binValue / static_cast(endSample - startSample); } // Store current frame data and calculate moving average diff --git a/Source/SystemAudioCapture.cpp b/Source/SystemAudioCapture.cpp new file mode 100644 index 0000000..3ffceb1 --- /dev/null +++ b/Source/SystemAudioCapture.cpp @@ -0,0 +1,132 @@ +// Copyright MediaZ Teknoloji A.S. All Rights Reserved. + +#include "SystemAudioCapture.h" + +#include "nosAudio/AudioConversions.hpp" + +#include + +namespace nos::audio +{ +namespace +{ +// Keep at most this many seconds of buffered source samples to stop a stalled +// consumer (or a hung execution graph) from growing memory unbounded. The +// existing WASAPI backend used 5 seconds; we keep the same budget for parity. +constexpr uint32_t MAX_BUFFERED_SECONDS = 5; +} // namespace + +void SystemAudioCaptureBase::ResetBuffer() +{ + std::lock_guard lock(BufferMutex); + CapturedSamples.clear(); +} + +void SystemAudioCaptureBase::PushInterleavedSamples(const float* samples, + uint32_t frameCount, + uint32_t sourceSampleRate, + uint8_t sourceChannelCount) +{ + if (!samples || frameCount == 0 || sourceChannelCount == 0 || sourceSampleRate == 0) + return; + + std::lock_guard lock(BufferMutex); + + // Format renegotiated mid-stream — drop stale samples so the resampler + // doesn't mix two layouts into a single read. + if (sourceSampleRate != SourceSampleRate || sourceChannelCount != SourceChannelCount) + { + CapturedSamples.clear(); + SourceSampleRate = sourceSampleRate; + SourceChannelCount = sourceChannelCount; + } + + const size_t sampleCount = static_cast(frameCount) * sourceChannelCount; + CapturedSamples.insert(CapturedSamples.end(), samples, samples + sampleCount); + + const size_t maxSamples = static_cast(SourceSampleRate) * SourceChannelCount * MAX_BUFFERED_SECONDS; + if (CapturedSamples.size() > maxSamples) + { + const size_t overflow = CapturedSamples.size() - maxSamples; + CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + overflow); + } +} + +bool SystemAudioCaptureBase::ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) +{ + std::unique_lock lock(BufferMutex); + + // Nothing has been pushed yet (or the stream is idle): emit silence. + if (SourceSampleRate == 0 || SourceChannelCount == 0) + { + for (uint32_t i = 0; i < numSamples * targetChannels; ++i) + outBuffer[i] = 0; + return false; + } + + const float sampleRateRatio = static_cast(SourceSampleRate) / static_cast(TargetSampleRate); + const uint32_t sourceSamplesNeeded = static_cast(numSamples * sampleRateRatio); + + if (CapturedSamples.size() < static_cast(sourceSamplesNeeded) * SourceChannelCount) + { + for (uint32_t i = 0; i < numSamples * targetChannels; ++i) + outBuffer[i] = 0; + return false; + } + + // Linear interpolation resample + channel map + gain + int24 pack. + for (uint32_t i = 0; i < numSamples; ++i) + { + const float sourceIndex = i * sampleRateRatio; + const uint32_t sourceIndexInt = static_cast(sourceIndex); + const float frac = sourceIndex - sourceIndexInt; + + for (uint8_t ch = 0; ch < targetChannels; ++ch) + { + // Fold extra target channels onto source channel 0 (mono fallback). + const uint8_t sourceChannel = (ch < SourceChannelCount) ? ch : 0; + + const uint32_t idx1 = sourceIndexInt * SourceChannelCount + sourceChannel; + const uint32_t idx2 = std::min(idx1 + SourceChannelCount, + static_cast(CapturedSamples.size() - 1)); + + if (idx1 < CapturedSamples.size() && idx2 < CapturedSamples.size()) + { + const float sample1 = CapturedSamples[idx1]; + const float sample2 = CapturedSamples[idx2]; + float interpolated = sample1 + (sample2 - sample1) * frac; + interpolated *= gain; + outBuffer[i * targetChannels + ch] = FloatToShiftedInt24(interpolated); + } + else + { + outBuffer[i * targetChannels + ch] = 0; + } + } + } + + const size_t samplesToRemove = static_cast(sourceSamplesNeeded) * SourceChannelCount; + if (samplesToRemove < CapturedSamples.size()) + CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + samplesToRemove); + else + CapturedSamples.clear(); + + // Post-read drift correction: ReadSamples consumes at exactly real-time + // rate, so any historical producer/consumer skew (startup gap, frame-drop + // stall, path-restart burst) would otherwise persist as permanent latency + // — we just pull from the head forever, staying N ms behind live. Cap the + // residual buffer at a small smoothing window; anything older gets dropped + // so the next read snaps back toward live. The discontinuity this causes + // is audibly a one-shot click, preferable to sustained lag. + constexpr float MAX_POST_READ_SECONDS = 0.1f; + const size_t maxKeep = static_cast(static_cast(SourceSampleRate) * MAX_POST_READ_SECONDS) * + SourceChannelCount; + if (maxKeep > 0 && CapturedSamples.size() > maxKeep) + { + const size_t drop = CapturedSamples.size() - maxKeep; + CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + drop); + } + + return true; +} +} // namespace nos::audio diff --git a/Source/SystemAudioCapture.h b/Source/SystemAudioCapture.h new file mode 100644 index 0000000..dc1be8a --- /dev/null +++ b/Source/SystemAudioCapture.h @@ -0,0 +1,85 @@ +// Copyright MediaZ Teknoloji A.S. All Rights Reserved. + +#pragma once + +#include +#include +#include +#include +#include + +namespace nos::audio +{ +// Platform-agnostic contract for capturing the host's system audio output +// (loopback). One concrete backend is linked per target OS; the node code +// only ever sees this interface. See the *Windows.cpp / *Mac.mm files for +// the actual WASAPI / ScreenCaptureKit implementations. +class ISystemAudioCapture +{ +public: + virtual ~ISystemAudioCapture() = default; + + // Prepare the backend for the node's requested target format. On failure + // returns false and leaves a user-readable reason in GetLastError(). + virtual bool Initialize(uint32_t sampleRate, uint8_t channelCount) = 0; + + // Begin producing samples into the internal ring buffer. Idempotent. + virtual bool Start() = 0; + + // Stop the backend and drain its worker. Safe to call before Start() or + // more than once. + virtual void Stop() = 0; + + // Pull numSamples interleaved shifted-int24 frames into outBuffer at the + // requested channel layout and gain. Returns true when real audio was + // delivered, false when the buffer was filled with silence because the + // backend has not produced enough data yet. + virtual bool ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) = 0; + + // Drop any audio that has accumulated in the internal ring buffer. Called + // on path start so the consumer doesn't have to pay for latency that built + // up between Start() and the first ReadSamples. + virtual void DiscardBufferedSamples() = 0; + + virtual const std::string& GetDeviceName() const = 0; + virtual const std::string& GetLastError() const = 0; + + // Platform factory — defined once per OS in its own TU. + static std::unique_ptr Create(); +}; + +// Shared ring-buffer + resampler scaffolding. Platform backends only have to +// push interleaved float frames via PushInterleavedSamples; the base handles +// rate conversion, gain, and int24 packing so the WASAPI / ScreenCaptureKit +// files can stay focused on their respective native API dances. +class SystemAudioCaptureBase : public ISystemAudioCapture +{ +public: + bool ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) override; + void DiscardBufferedSamples() override { ResetBuffer(); } + const std::string& GetDeviceName() const override { return DeviceName; } + const std::string& GetLastError() const override { return LastError; } + +protected: + // Feed interleaved Float32 samples from the platform capture callback. + // If sourceSampleRate or sourceChannelCount differ from the previous call + // the internal buffer is reset, so format renegotiation mid-stream can't + // produce torn audio. + void PushInterleavedSamples(const float* samples, + uint32_t frameCount, + uint32_t sourceSampleRate, + uint8_t sourceChannelCount); + + // Drop any buffered samples — used when the node reinitializes. + void ResetBuffer(); + + std::mutex BufferMutex; + std::vector CapturedSamples; + uint32_t SourceSampleRate = 0; + uint8_t SourceChannelCount = 0; + uint32_t TargetSampleRate = 0; + uint8_t TargetChannelCount = 0; + std::string DeviceName; + std::string LastError; +}; +} // namespace nos::audio diff --git a/Source/SystemAudioCaptureMac.mm b/Source/SystemAudioCaptureMac.mm new file mode 100644 index 0000000..e7ae79f --- /dev/null +++ b/Source/SystemAudioCaptureMac.mm @@ -0,0 +1,438 @@ +// Copyright MediaZ Teknoloji A.S. All Rights Reserved. + +#ifdef __APPLE__ + +#include "SystemAudioCapture.h" + +#import +#import +#import +#import +#import + +#include + +#include + +#include +#include +#include + +// ScreenCaptureKit is the only first-party macOS API for capturing the system +// audio output without installing a virtual device. It requires macOS 13+ and +// the Screen Recording TCC permission — macOS prompts the user automatically +// on the first SCShareableContent request. On denial or pre-13 hosts we fill +// LastError with a human-readable reason so the node surfaces it in the +// editor status area. +// +// Thread model: every public entry point (Initialize/Start/Stop) is invoked +// on an engine runner thread. Apple's ScreenCaptureKit docs DON'T formally +// require main thread, but in practice calling SCShareableContent / SCStream +// cold from a worker is known to hang or crash (FB12114396, FB15779754, +// and community report nonstrict-hq/SCShareableContent-hangs-sample). The +// root cause is that libxpc delivers replies from tccd / replayd / the +// WindowServer to dispatch_get_main_queue() by default, and CFRunLoop on +// main is the canonical place those Mach-port sources get serviced — plus +// Obj-C +initialize for these frameworks assumes the main runloop is live. +// Apple DTS has confirmed on-record that their sample code "just happened +// to be called on main" (developer.apple.com/forums/thread/735651). We +// funnel the Obj-C work through nosEngine.RunOnMainThread so we don't +// depend on that accident — the same pattern nos.display uses for AppKit. + +namespace nos::audio +{ +// Forward-declared here so the Obj-C delegate below can hold a raw pointer +// back to it. The anonymous-namespace pattern doesn't work — the Obj-C +// @property needs a named, externally-addressable type. +class ScreenCaptureKitCapture; +} // namespace nos::audio + +// Obj-C adapter for SCStream callbacks. The SCStream delegate is a separate +// protocol from the SCStreamOutput sample handler, but ScreenCaptureKit lets +// us implement both on the same object, which keeps ownership simple. +API_AVAILABLE(macos(13.0)) +@interface NosAudioStreamOutput : NSObject +@property (nonatomic, assign) nos::audio::ScreenCaptureKitCapture* backend; +@end + +namespace nos::audio +{ +class ScreenCaptureKitCapture : public SystemAudioCaptureBase +{ +public: + ~ScreenCaptureKitCapture() override { Stop(); } + + bool Initialize(uint32_t sampleRate, uint8_t channelCount) override; + bool Start() override; + void Stop() override; + + // Invoked from the SCStreamOutput delegate on the capture queue. + void OnAudioSampleBuffer(CMSampleBufferRef sampleBuffer); + +private: + // Wait this long for the async ScreenCaptureKit handshake / teardown + // before giving up. Initialize / Start / Stop are called from the editor + // execution thread, so we cap the wait to keep a hung system-service + // from stalling the whole graph. + static constexpr uint64_t ASYNC_TIMEOUT_SECONDS = 5; + + SCStream* Stream API_AVAILABLE(macos(13.0)) = nil; + NosAudioStreamOutput* Delegate API_AVAILABLE(macos(13.0)) = nil; + dispatch_queue_t AudioQueue = nullptr; + std::atomic Running{false}; + + // Reused each callback so we don't allocate in the capture hot path when + // ScreenCaptureKit delivers planar (non-interleaved) float samples. + std::vector InterleaveScratch; +}; + +namespace +{ +// Synchronously bounce fn onto the main thread via the engine's dispatcher. +// If the host engine predates plugin API 41.1 (no RunOnMainThread exposed), +// we log and fall through — running on the worker anyway is the best we can +// offer, and users on old engines will see the same intermittent crash they +// would have seen before this plugin existed. +void RunOnMainThreadSync(std::function fn) +{ + if (!fn) + return; + if ([NSThread isMainThread]) + { + fn(); + return; + } + if (::nosEngine.RunOnMainThread) + { + ::nosEngine.RunOnMainThread( + [](void* p) { (*static_cast*>(p))(); }, + &fn, + NOS_TRUE); + return; + } + static bool warned = false; + if (!warned) + { + ::nosEngine.LogE("nos.audio: host engine has no RunOnMainThread; ScreenCaptureKit calls may crash."); + warned = true; + } + fn(); +} + +// Pump the main runloop up to `timeoutSeconds` while `*done` is still false. +// Two reasons this is preferred over dispatch_semaphore_wait on main: +// 1. ScreenCaptureKit completion handlers are delivered through libxpc, and +// libxpc replies default to dispatch_get_main_queue(). Blocking main +// with a semaphore deadlocks: the main queue can't service the reply +// because we're sitting on it. +// 2. We stay on a runloop pass cadence, so any plugin that queued work on +// the main thread via MainThreadDispatcher (e.g. the display plugin) +// doesn't starve while we wait. +// Returns true if `done` flipped before the deadline. +bool PumpMainRunLoopUntil(const bool& done, double timeoutSeconds) +{ + const CFAbsoluteTime deadline = CFAbsoluteTimeGetCurrent() + timeoutSeconds; + while (!done) + { + const CFAbsoluteTime remaining = deadline - CFAbsoluteTimeGetCurrent(); + if (remaining <= 0.0) + return false; + const CFTimeInterval step = std::min(0.05, remaining); + CFRunLoopRunInMode(kCFRunLoopDefaultMode, step, /*returnAfterSourceHandled*/ true); + } + return true; +} +} // namespace + +bool ScreenCaptureKitCapture::Initialize(uint32_t sampleRate, uint8_t channelCount) +{ + if (@available(macOS 13.0, *)) + { + TargetSampleRate = sampleRate; + TargetChannelCount = channelCount; + + bool ok = false; + RunOnMainThreadSync([&] { + @autoreleasepool + { + // Everything that touches SCShareableContent / SCDisplay lives + // inside the completion handler, where the framework guarantees + // those objects are alive. Propagating them out through ARC + + // __block + dispatch_semaphore_wait crashes intermittently on + // macOS 26 (objc_msgSend on an apparently-valid SCShareableContent + // in the caller's frame). Building the SCStream here avoids that. + __block bool done = false; + __block bool fetchOk = false; + __block std::string fetchError; + [SCShareableContent getShareableContentWithCompletionHandler:^(SCShareableContent* content, NSError* error) { + @autoreleasepool + { + if (error) + { + NSString* desc = error.localizedDescription; + const char* utf8 = desc.UTF8String; + fetchError = utf8 ? utf8 : "ScreenCaptureKit returned an unspecified error"; + done = true; + return; + } + if (!content || content.displays.count == 0) + { + fetchError = "Screen Recording permission required. Enable Nodos in System Settings → " + "Privacy & Security → Screen & System Audio Recording, then restart the editor."; + done = true; + return; + } + + SCDisplay* display = content.displays.firstObject; + DeviceName = std::string("System Audio (Display ") + std::to_string(display.displayID) + ")"; + + SCContentFilter* filter = [[SCContentFilter alloc] initWithDisplay:display excludingWindows:@[]]; + SCStreamConfiguration* config = [[SCStreamConfiguration alloc] init]; + config.capturesAudio = YES; + config.excludesCurrentProcessAudio = NO; + config.sampleRate = (NSInteger)sampleRate; + config.channelCount = (NSInteger)channelCount; + // ScreenCaptureKit on macOS 13–14 still requires a video track + // to be configured even for audio-only capture. A 2×2, 1 fps + // track is the cheapest legal configuration and we never attach + // a video output, so the frames are dropped by the framework. + config.width = 2; + config.height = 2; + config.minimumFrameInterval = CMTimeMake(1, 1); + config.queueDepth = 6; + + Delegate = [[NosAudioStreamOutput alloc] init]; + Delegate.backend = this; + + Stream = [[SCStream alloc] initWithFilter:filter configuration:config delegate:Delegate]; + + AudioQueue = dispatch_queue_create("dev.nodos.audio.SystemAudioCapture", DISPATCH_QUEUE_SERIAL); + + NSError* attachError = nil; + const BOOL attached = [Stream addStreamOutput:Delegate + type:SCStreamOutputTypeAudio + sampleHandlerQueue:AudioQueue + error:&attachError]; + if (!attached) + { + const char* utf8 = attachError.localizedDescription.UTF8String; + fetchError = utf8 ? utf8 : "Failed to attach audio stream output"; + Stream = nil; + Delegate = nil; + AudioQueue = nullptr; + done = true; + return; + } + + fetchOk = true; + done = true; + } + }]; + + if (!PumpMainRunLoopUntil(done, 5.0)) + { + LastError = "Timed out waiting for shareable content"; + return; + } + if (!fetchOk) + { + LastError = std::move(fetchError); + return; + } + LastError.clear(); + ok = true; + } + }); + return ok; + } + + LastError = "System audio capture requires macOS 13 (Ventura) or later"; + return false; +} + +bool ScreenCaptureKitCapture::Start() +{ + if (Running) + return true; + if (@available(macOS 13.0, *)) + { + if (!Stream) + return false; + + bool ok = false; + RunOnMainThreadSync([&] { + @autoreleasepool + { + __block bool done = false; + __block std::string startErrorMessage; + __block bool hasStartError = false; + [Stream startCaptureWithCompletionHandler:^(NSError* error) { + if (error) + { + hasStartError = true; + NSString* desc = error.localizedDescription; + const char* utf8 = desc.UTF8String; + startErrorMessage = utf8 ? utf8 : "ScreenCaptureKit returned an unspecified error"; + } + done = true; + }]; + if (!PumpMainRunLoopUntil(done, ASYNC_TIMEOUT_SECONDS)) + { + LastError = "Timed out starting ScreenCaptureKit stream"; + return; + } + if (hasStartError) + { + LastError = std::move(startErrorMessage); + return; + } + Running = true; + ok = true; + } + }); + return ok; + } + return false; +} + +void ScreenCaptureKitCapture::Stop() +{ + if (!Running && !Stream) + return; + + if (@available(macOS 13.0, *)) + { + RunOnMainThreadSync([&] { + @autoreleasepool + { + if (Stream && Running) + { + __block bool done = false; + [Stream stopCaptureWithCompletionHandler:^(NSError* /*error*/) { + done = true; + }]; + PumpMainRunLoopUntil(done, ASYNC_TIMEOUT_SECONDS); + } + if (Delegate) + Delegate.backend = nullptr; + Stream = nil; + Delegate = nil; + } + }); + } + + AudioQueue = nullptr; + Running = false; + ResetBuffer(); +} + +void ScreenCaptureKitCapture::OnAudioSampleBuffer(CMSampleBufferRef sampleBuffer) +{ + if (!sampleBuffer || !CMSampleBufferIsValid(sampleBuffer) || !CMSampleBufferDataIsReady(sampleBuffer)) + return; + + CMFormatDescriptionRef desc = CMSampleBufferGetFormatDescription(sampleBuffer); + if (!desc) + return; + const AudioStreamBasicDescription* asbd = CMAudioFormatDescriptionGetStreamBasicDescription(desc); + if (!asbd || asbd->mBitsPerChannel != 32 || !(asbd->mFormatFlags & kAudioFormatFlagIsFloat)) + { + // ScreenCaptureKit always delivers Float32 PCM per the docs; bail + // safely if a future macOS changes that so we don't interpret the + // bytes as the wrong type. + return; + } + + const uint32_t channels = asbd->mChannelsPerFrame; + const uint32_t sourceRate = static_cast(asbd->mSampleRate); + if (channels == 0 || sourceRate == 0) + return; + + const CMItemCount frameCount = CMSampleBufferGetNumSamples(sampleBuffer); + if (frameCount == 0) + return; + + // Two-phase pull: first call sizes the AudioBufferList, second copies. + size_t bufferListSize = 0; + OSStatus status = CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer( + sampleBuffer, + &bufferListSize, + nullptr, + 0, + kCFAllocatorSystemDefault, + kCFAllocatorSystemDefault, + kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment, + nullptr); + if (status != noErr || bufferListSize == 0) + return; + + std::vector storage(bufferListSize); + auto* list = reinterpret_cast(storage.data()); + CMBlockBufferRef blockBuffer = nullptr; + status = CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer( + sampleBuffer, + nullptr, + list, + bufferListSize, + kCFAllocatorSystemDefault, + kCFAllocatorSystemDefault, + kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment, + &blockBuffer); + if (status != noErr || !blockBuffer) + return; + + const bool nonInterleaved = (asbd->mFormatFlags & kAudioFormatFlagIsNonInterleaved) != 0; + if (nonInterleaved && list->mNumberBuffers == channels) + { + InterleaveScratch.resize(static_cast(frameCount) * channels); + for (uint32_t ch = 0; ch < channels; ++ch) + { + const float* src = reinterpret_cast(list->mBuffers[ch].mData); + if (!src) + continue; + for (CMItemCount f = 0; f < frameCount; ++f) + InterleaveScratch[static_cast(f) * channels + ch] = src[f]; + } + PushInterleavedSamples(InterleaveScratch.data(), + static_cast(frameCount), + sourceRate, + static_cast(channels)); + } + else if (list->mNumberBuffers >= 1) + { + const float* src = reinterpret_cast(list->mBuffers[0].mData); + if (src) + PushInterleavedSamples(src, + static_cast(frameCount), + sourceRate, + static_cast(channels)); + } + + CFRelease(blockBuffer); +} + +std::unique_ptr ISystemAudioCapture::Create() +{ + return std::make_unique(); +} +} // namespace nos::audio + +@implementation NosAudioStreamOutput +- (void)stream:(SCStream*)stream didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer ofType:(SCStreamOutputType)type +{ + if (type != SCStreamOutputTypeAudio) + return; + auto* backend = self.backend; + if (backend) + backend->OnAudioSampleBuffer(sampleBuffer); +} + +- (void)stream:(SCStream*)stream didStopWithError:(NSError*)error +{ + // Surface nothing here directly; the node already displays a warning when + // ReadSamples returns silence. Recording the error on the backend would + // race with Stop() tearing everything down, so we keep the hook empty. +} +@end + +#endif // __APPLE__ diff --git a/Source/SystemAudioCaptureWindows.cpp b/Source/SystemAudioCaptureWindows.cpp new file mode 100644 index 0000000..b62c151 --- /dev/null +++ b/Source/SystemAudioCaptureWindows.cpp @@ -0,0 +1,194 @@ +// Copyright MediaZ Teknoloji A.S. All Rights Reserved. + +#ifdef _WIN32 + +#include "SystemAudioCapture.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +_COM_SMARTPTR_TYPEDEF(IMMDeviceEnumerator, __uuidof(IMMDeviceEnumerator)); +_COM_SMARTPTR_TYPEDEF(IMMDevice, __uuidof(IMMDevice)); +_COM_SMARTPTR_TYPEDEF(IAudioClient, __uuidof(IAudioClient)); +_COM_SMARTPTR_TYPEDEF(IAudioCaptureClient, __uuidof(IAudioCaptureClient)); + +namespace nos::audio +{ +namespace +{ +class WASAPICapture : public SystemAudioCaptureBase +{ +public: + WASAPICapture() { CoInitializeEx(nullptr, COINIT_MULTITHREADED); } + ~WASAPICapture() override + { + Stop(); + CoUninitialize(); + } + + bool Initialize(uint32_t sampleRate, uint8_t channelCount) override + { + IMMDeviceEnumeratorPtr enumerator; + if (FAILED(enumerator.CreateInstance(__uuidof(MMDeviceEnumerator)))) + { + LastError = "Failed to create MMDeviceEnumerator"; + return false; + } + + IMMDevicePtr device; + if (FAILED(enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device))) + { + LastError = "No default render endpoint"; + return false; + } + + IPropertyStore* props = nullptr; + if (SUCCEEDED(device->OpenPropertyStore(STGM_READ, &props))) + { + PROPVARIANT varName; + PropVariantInit(&varName); + if (SUCCEEDED(props->GetValue(PKEY_Device_FriendlyName, &varName))) + { + DeviceName = _com_util::ConvertBSTRToString(varName.bstrVal); + PropVariantClear(&varName); + } + props->Release(); + } + + if (FAILED(device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&AudioClient))) + { + LastError = "Failed to activate audio client"; + return false; + } + + WAVEFORMATEX* mixFormat = nullptr; + if (FAILED(AudioClient->GetMixFormat(&mixFormat))) + { + LastError = "Failed to get mix format"; + return false; + } + + const HRESULT initHr = AudioClient->Initialize( + AUDCLNT_SHAREMODE_SHARED, + AUDCLNT_STREAMFLAGS_LOOPBACK, + 10'000'000, // 1 second buffer in 100-ns units + 0, + mixFormat, + nullptr); + + // Snapshot the negotiated format before freeing it. + const uint32_t negotiatedRate = mixFormat->nSamplesPerSec; + const uint8_t negotiatedChannels = static_cast(mixFormat->nChannels); + CoTaskMemFree(mixFormat); + + if (FAILED(initHr)) + { + LastError = "Failed to initialize loopback client"; + return false; + } + + SourceSampleRate = negotiatedRate; + SourceChannelCount = negotiatedChannels; + TargetSampleRate = sampleRate; + TargetChannelCount = channelCount; + + if (FAILED(AudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&CaptureClient))) + { + LastError = "Failed to get capture client"; + return false; + } + + LastError.clear(); + return true; + } + + bool Start() override + { + if (IsCapturing) + return true; + if (!AudioClient) + return false; + if (FAILED(AudioClient->Start())) + { + LastError = "Failed to start audio client"; + return false; + } + IsCapturing = true; + ShouldStop = false; + CaptureThread = std::thread(&WASAPICapture::CaptureThreadFunc, this); + return true; + } + + void Stop() override + { + if (!IsCapturing) + return; + ShouldStop = true; + if (CaptureThread.joinable()) + CaptureThread.join(); + if (AudioClient) + AudioClient->Stop(); + IsCapturing = false; + } + +private: + void CaptureThreadFunc() + { + while (!ShouldStop) + { + if (!CaptureClient) + break; + + UINT32 packetLength = 0; + if (FAILED(CaptureClient->GetNextPacketSize(&packetLength))) + break; + + while (packetLength > 0) + { + BYTE* data = nullptr; + UINT32 numFramesAvailable = 0; + DWORD flags = 0; + if (FAILED(CaptureClient->GetBuffer(&data, &numFramesAvailable, &flags, nullptr, nullptr))) + break; + + if (!(flags & AUDCLNT_BUFFERFLAGS_SILENT) && numFramesAvailable > 0) + { + PushInterleavedSamples( + reinterpret_cast(data), + numFramesAvailable, + SourceSampleRate, + SourceChannelCount); + } + + CaptureClient->ReleaseBuffer(numFramesAvailable); + + if (FAILED(CaptureClient->GetNextPacketSize(&packetLength))) + break; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + } + + IAudioClientPtr AudioClient; + IAudioCaptureClientPtr CaptureClient; + std::thread CaptureThread; + std::atomic IsCapturing{false}; + std::atomic ShouldStop{false}; +}; +} // namespace + +std::unique_ptr ISystemAudioCapture::Create() +{ + return std::make_unique(); +} +} // namespace nos::audio + +#endif // _WIN32 diff --git a/Source/SystemAudioInput.cpp b/Source/SystemAudioInput.cpp index dd3e1ed..1bb52b9 100644 --- a/Source/SystemAudioInput.cpp +++ b/Source/SystemAudioInput.cpp @@ -3,283 +3,19 @@ #include #include -#include -#include -#include -#include -#include + +#include +#include +#include #include "nosAudio/Audio_generated.h" #include "nosAudio/AudioConversions.hpp" -#ifdef _WIN32 -#include -#include -#include -#include -#include - -// COM smart pointer helpers -_COM_SMARTPTR_TYPEDEF(IMMDeviceEnumerator, __uuidof(IMMDeviceEnumerator)); -_COM_SMARTPTR_TYPEDEF(IMMDevice, __uuidof(IMMDevice)); -_COM_SMARTPTR_TYPEDEF(IAudioClient, __uuidof(IAudioClient)); -_COM_SMARTPTR_TYPEDEF(IAudioCaptureClient, __uuidof(IAudioCaptureClient)); -#endif +#include "SystemAudioCapture.h" namespace nos::audio { -#ifdef _WIN32 -class WASAPICapture -{ -public: - WASAPICapture() : IsCapturing(false), ShouldStop(false) - { - CoInitializeEx(nullptr, COINIT_MULTITHREADED); - } - - ~WASAPICapture() - { - Stop(); - CoUninitialize(); - } - - bool Initialize(uint32_t sampleRate, uint8_t channelCount) - { - HRESULT hr; - - // Create device enumerator - IMMDeviceEnumeratorPtr enumerator; - hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator)); - if (FAILED(hr)) - return false; - - // Get default audio endpoint (for loopback capture) - IMMDevicePtr device; - hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device); - if (FAILED(hr)) - return false; - - // Get device name - IPropertyStore* props = nullptr; - hr = device->OpenPropertyStore(STGM_READ, &props); - if (SUCCEEDED(hr)) - { - PROPVARIANT varName; - PropVariantInit(&varName); - hr = props->GetValue(PKEY_Device_FriendlyName, &varName); - if (SUCCEEDED(hr)) - { - DeviceName = _com_util::ConvertBSTRToString(varName.bstrVal); - PropVariantClear(&varName); - } - props->Release(); - } - - // Activate audio client - hr = device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&AudioClient); - if (FAILED(hr)) - return false; - - // Get the mix format - WAVEFORMATEX* mixFormat = nullptr; - hr = AudioClient->GetMixFormat(&mixFormat); - if (FAILED(hr)) - return false; - - // Initialize audio client for loopback capture - hr = AudioClient->Initialize( - AUDCLNT_SHAREMODE_SHARED, - AUDCLNT_STREAMFLAGS_LOOPBACK, - 10000000, // 1 second buffer - 0, - mixFormat, - nullptr); - - if (FAILED(hr)) - { - CoTaskMemFree(mixFormat); - return false; - } - - // Store format info - SourceSampleRate = mixFormat->nSamplesPerSec; - SourceChannelCount = mixFormat->nChannels; - TargetSampleRate = sampleRate; - TargetChannelCount = channelCount; - - CoTaskMemFree(mixFormat); - - // Get capture client - hr = AudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&CaptureClient); - if (FAILED(hr)) - return false; - - return true; - } - - bool Start() - { - if (IsCapturing) - return true; - - if (!AudioClient) - return false; - - HRESULT hr = AudioClient->Start(); - if (FAILED(hr)) - return false; - - IsCapturing = true; - ShouldStop = false; - CaptureThread = std::thread(&WASAPICapture::CaptureThreadFunc, this); - - return true; - } - - void Stop() - { - if (!IsCapturing) - return; - - ShouldStop = true; - if (CaptureThread.joinable()) - CaptureThread.join(); - - if (AudioClient) - AudioClient->Stop(); - - IsCapturing = false; - } - - bool ReadSamples(int32_t* outBuffer, uint32_t numSamples, uint8_t targetChannels, float gain) - { - std::unique_lock lock(BufferMutex); - - // Calculate how many source samples we need - float sampleRateRatio = static_cast(SourceSampleRate) / static_cast(TargetSampleRate); - uint32_t sourceSamplesNeeded = static_cast(numSamples * sampleRateRatio); - - // If we don't have enough samples, fill with silence - if (CapturedSamples.size() < sourceSamplesNeeded * SourceChannelCount) - { - for (uint32_t i = 0; i < numSamples * targetChannels; ++i) - outBuffer[i] = 0; - return false; // No audio available - } - - // Resample and convert - for (uint32_t i = 0; i < numSamples; ++i) - { - float sourceIndex = i * sampleRateRatio; - uint32_t sourceIndexInt = static_cast(sourceIndex); - float frac = sourceIndex - sourceIndexInt; - - for (uint8_t ch = 0; ch < targetChannels; ++ch) - { - // Map target channel to source channel (handle mono/stereo conversions) - uint8_t sourceChannel = (ch < SourceChannelCount) ? ch : 0; - - // Get samples for interpolation - uint32_t idx1 = sourceIndexInt * SourceChannelCount + sourceChannel; - uint32_t idx2 = std::min(idx1 + SourceChannelCount, static_cast(CapturedSamples.size() - 1)); - - if (idx1 < CapturedSamples.size() && idx2 < CapturedSamples.size()) - { - float sample1 = CapturedSamples[idx1]; - float sample2 = CapturedSamples[idx2]; - float interpolated = sample1 + (sample2 - sample1) * frac; - - // Apply gain and convert to shifted int24 - interpolated *= gain; - outBuffer[i * targetChannels + ch] = FloatToShiftedInt24(interpolated); - } - else - { - outBuffer[i * targetChannels + ch] = 0; - } - } - } - - // Remove consumed samples - uint32_t samplesToRemove = sourceSamplesNeeded * SourceChannelCount; - if (samplesToRemove < CapturedSamples.size()) - CapturedSamples.erase(CapturedSamples.begin(), CapturedSamples.begin() + samplesToRemove); - - return true; // Audio successfully read - } - - const std::string& GetDeviceName() const { return DeviceName; } - -private: - void CaptureThreadFunc() - { - while (!ShouldStop) - { - if (!CaptureClient) - break; - - UINT32 packetLength = 0; - HRESULT hr = CaptureClient->GetNextPacketSize(&packetLength); - if (FAILED(hr)) - break; - - while (packetLength > 0) - { - BYTE* data = nullptr; - UINT32 numFramesAvailable = 0; - DWORD flags = 0; - - hr = CaptureClient->GetBuffer(&data, &numFramesAvailable, &flags, nullptr, nullptr); - if (FAILED(hr)) - break; - - // Convert samples to float and store - if (!(flags & AUDCLNT_BUFFERFLAGS_SILENT)) - { - float* floatData = reinterpret_cast(data); - std::lock_guard lock(BufferMutex); - - for (UINT32 i = 0; i < numFramesAvailable * SourceChannelCount; ++i) - { - CapturedSamples.push_back(floatData[i]); - } - - // Limit buffer size to prevent unbounded growth (keep max 5 seconds) - size_t maxSamples = SourceSampleRate * SourceChannelCount * 5; - if (CapturedSamples.size() > maxSamples) - { - CapturedSamples.erase(CapturedSamples.begin(), - CapturedSamples.begin() + (CapturedSamples.size() - maxSamples)); - } - } - - CaptureClient->ReleaseBuffer(numFramesAvailable); - - hr = CaptureClient->GetNextPacketSize(&packetLength); - if (FAILED(hr)) - break; - } - - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } - } - - IAudioClientPtr AudioClient; - IAudioCaptureClientPtr CaptureClient; - std::thread CaptureThread; - std::atomic IsCapturing; - std::atomic ShouldStop; - std::vector CapturedSamples; - std::mutex BufferMutex; - uint32_t SourceSampleRate = 0; - uint8_t SourceChannelCount = 0; - uint32_t TargetSampleRate = 0; - uint8_t TargetChannelCount = 0; - std::string DeviceName; -}; -#endif - struct SystemAudioInputNode : NodeContext { void SetNodeStatusMessageIfChanged(const std::string& message, fb::NodeStatusMessageType type) @@ -293,88 +29,76 @@ struct SystemAudioInputNode : NodeContext nosResult OnCreate(nosFbNodePtr) override { - AddPinValueWatcher(NOS_NAME("Active"), - [this](const bool* newVal, std::optional oldVal) { - Active = *newVal; - if (Active) - SetNodeStatusMessageIfChanged("System audio input active", fb::NodeStatusMessageType::INFO); - else - SetNodeStatusMessageIfChanged("System audio input inactive", fb::NodeStatusMessageType::WARNING); - }); - AddPinValueWatcher(NOS_NAME("SampleRate"), - [this](const uint32_t* newVal, std::optional oldVal) { - if (!oldVal || *newVal != **oldVal) - NeedsReinitialize = true; - }); + [this](const uint32_t* newVal, std::optional oldVal) { + if (!oldVal || *newVal != **oldVal) + NeedsReinitialize = true; + }); AddPinValueWatcher(NOS_NAME("ChannelCount"), - [this](const uint8_t* newVal, std::optional oldVal) { - if (!oldVal || *newVal != **oldVal) - NeedsReinitialize = true; - }); + [this](const uint8_t* newVal, std::optional oldVal) { + if (!oldVal || *newVal != **oldVal) + NeedsReinitialize = true; + }); return NOS_RESULT_SUCCESS; } ~SystemAudioInputNode() override { -#ifdef _WIN32 if (Capture) - { Capture->Stop(); - Capture.reset(); - } -#endif } void OnPathStart() override { - ClearNodeStatusMessages(); AccumulatedSampleNumerator = 0; CurrentSampleIndex = 0; - NeedsReinitialize = true; - - if (Active) - SetNodeStatusMessageIfChanged("System audio input active", fb::NodeStatusMessageType::INFO); - else - SetNodeStatusMessageIfChanged("System audio input inactive", fb::NodeStatusMessageType::WARNING); - } - - void OnPathStop() override - { -#ifdef _WIN32 + // Drop any audio that queued up between Capture->Start() and this + // first consumer tick. Without this, the consumer would forever play + // from the back of a full ring buffer, running the apparent latency + // ceiling (~100ms after the in-read trim) instead of the floor. if (Capture) - { - Capture->Stop(); - } -#endif - ClearNodeStatusMessages(); + Capture->DiscardBufferedSamples(); } + // Deliberately no OnPathStop override: ScreenCaptureKit's Stop() is a full + // stream teardown (not a pause), so any Stop here would leave the stream + // dead across the routine OnPathStop → OnPathStart cycles that happen on + // graph load and downstream reconfiguration. The backend stays running + // until Active flips off or the node is destroyed, and the status message + // is kept in sync by ExecuteNode below rather than being cleared here — + // clearing with ClearNodeStatusMessages without also resetting the + // LastStatusMessage mirror used to leave the node with no visible status + // after a path restart. + nosResult ExecuteNode(NodeExecuteParams const& pins) override { + // Read Active straight from the pin rather than relying on a watcher- + // backed mirror: on graph load the first ExecuteNode can fire before + // the watcher has propagated the saved `true`, which left the node + // inert until the user toggled the pin. + const bool active = *pins.GetPinValue(NOS_NAME("Active")); auto& sampleRate = *pins.GetPinValue(NOS_NAME("SampleRate")); auto& channelCount = *pins.GetPinValue(NOS_NAME("ChannelCount")); auto& gain = *pins.GetPinValue(NOS_NAME("Gain")); - // Only support fixed step timing if (pins.TimingMode != NOS_EXECUTION_TIMING_MODE_FIXED_STEP) { SetNodeStatusMessageIfChanged("Unsupported timing mode", fb::NodeStatusMessageType::FAILURE); return NOS_RESULT_FAILED; } - // Check for invalid timing values if (pins.FixedStepTiming.DeltaSeconds.y == 0) { SetNodeStatusMessageIfChanged("Invalid timing values", fb::NodeStatusMessageType::FAILURE); return NOS_RESULT_FAILED; } -#ifdef _WIN32 - // Initialize or reinitialize capture if needed - if (Active && (NeedsReinitialize || !Capture)) + // (Re)create the backend whenever Active flips on or the requested + // format changes. A null Capture after this branch means the platform + // has no backend compiled in — we emit silence + a status message. + if (active && (NeedsReinitialize || !Capture)) { if (Capture) { @@ -382,51 +106,72 @@ struct SystemAudioInputNode : NodeContext Capture.reset(); } - Capture = std::make_unique(); + Capture = ISystemAudioCapture::Create(); + if (!Capture) + { + SetNodeStatusMessageIfChanged("System audio input is not supported on this platform", + fb::NodeStatusMessageType::FAILURE); + SetPinValue(NOS_NAME("Active"), false); + return NOS_RESULT_FAILED; + } + if (!Capture->Initialize(sampleRate, channelCount)) { - SetNodeStatusMessageIfChanged("Failed to initialize system audio capture", fb::NodeStatusMessageType::FAILURE); + const auto& err = Capture->GetLastError(); + SetNodeStatusMessageIfChanged( + err.empty() ? std::string("Failed to initialize system audio capture") + : "Failed to initialize system audio capture: " + err, + fb::NodeStatusMessageType::FAILURE); Capture.reset(); - Active = false; + SetPinValue(NOS_NAME("Active"), false); return NOS_RESULT_FAILED; } if (!Capture->Start()) { - SetNodeStatusMessageIfChanged("Failed to start system audio capture", fb::NodeStatusMessageType::FAILURE); + const auto& err = Capture->GetLastError(); + SetNodeStatusMessageIfChanged( + err.empty() ? std::string("Failed to start system audio capture") + : "Failed to start system audio capture: " + err, + fb::NodeStatusMessageType::FAILURE); Capture.reset(); - Active = false; + SetPinValue(NOS_NAME("Active"), false); return NOS_RESULT_FAILED; } NeedsReinitialize = false; - std::string deviceMsg = "Audio capture is ready"; - if (!Capture->GetDeviceName().empty()) - deviceMsg += " (" + Capture->GetDeviceName() + ")"; - SetNodeStatusMessageIfChanged(deviceMsg, fb::NodeStatusMessageType::INFO); } - else if (!Active && Capture) + else if (!active && Capture) { Capture->Stop(); Capture.reset(); SetNodeStatusMessageIfChanged("System audio input inactive", fb::NodeStatusMessageType::WARNING); } -#else - // System audio input is not supported on non-Windows platforms yet - SetNodeStatusMessageIfChanged("System audio input is not supported on this platform yet", fb::NodeStatusMessageType::FAILURE); - return NOS_RESULT_FAILED; -#endif - uint64_t deltaNumerator = pins.FixedStepTiming.DeltaSeconds.x; - uint64_t deltaDenominator = pins.FixedStepTiming.DeltaSeconds.y; + // Steady-state status, re-posted every frame while capture is live. + // Posting here (instead of once inside the init branch) means the + // message survives path restarts: if OnPathStop or an external clear + // wipes the node status, the very next ExecuteNode repaints it, and + // the SetNodeStatusMessageIfChanged guard suppresses spam in the + // common case where the string hasn't changed. + if (active && Capture) + { + std::string deviceMsg = "Capturing system audio"; + if (!Capture->GetDeviceName().empty()) + deviceMsg += " (" + Capture->GetDeviceName() + ")"; + SetNodeStatusMessageIfChanged(deviceMsg, fb::NodeStatusMessageType::INFO); + } - AccumulatedSampleNumerator += deltaNumerator * static_cast(sampleRate); + const uint64_t deltaNumerator = pins.FixedStepTiming.DeltaSeconds.x; + const uint64_t deltaDenominator = pins.FixedStepTiming.DeltaSeconds.y; - uint32_t numSamples = static_cast(AccumulatedSampleNumerator / deltaDenominator); - AccumulatedSampleNumerator %= deltaDenominator; // Keep remainder for next frame + AccumulatedSampleNumerator += deltaNumerator * static_cast(sampleRate); + const uint32_t numSamples = static_cast(AccumulatedSampleNumerator / deltaDenominator); + AccumulatedSampleNumerator %= deltaDenominator; - // Create or resize audio buffer only if needed (with 1.1x headroom to avoid frequent reallocations) - size_t requiredBufferSize = numSamples * sizeof(uint32_t) * channelCount; + // Create or grow the audio buffer only when strictly necessary; 1.1x + // headroom amortises reallocations across small timing fluctuations. + const size_t requiredBufferSize = static_cast(numSamples) * sizeof(uint32_t) * channelCount; size_t allocatedBufferSize = 0; if (AudioPacketBuffer) { @@ -439,16 +184,27 @@ struct SystemAudioInputNode : NodeContext if (!AudioPacketBuffer || requiredBufferSize > allocatedBufferSize) { AudioPacketBuffer = {}; - - // Allocate 1.1x the required size to reduce frequency of reallocations - size_t newBufferSize = requiredBufferSize * 1.1f; + const size_t newBufferSize = static_cast(requiredBufferSize * 1.1f); nosBufferInfo audioBufferDesc = {}; audioBufferDesc.Size = static_cast(newBufferSize); audioBufferDesc.Usage = nosBufferUsage(NOS_BUFFER_USAGE_STORAGE_BUFFER | NOS_BUFFER_USAGE_TRANSFER_DST | NOS_BUFFER_USAGE_TRANSFER_SRC); - audioBufferDesc.MemoryFlags = - nosMemoryFlags(NOS_MEMORY_FLAGS_HOST_VISIBLE | NOS_MEMORY_FLAGS_FORCE_HOST_MEMORY); + // DOWNLOAD flips VMA from HOST_ACCESS_SEQUENTIAL_WRITE (which lets + // it pick write-combined memory) to HOST_ACCESS_RANDOM (cached + // memory). The engine already requests VK_MEMORY_PROPERTY_HOST_- + // COHERENT_BIT in either case, so host↔device coherence is fine + // without this flag — but the buffer is ALSO read by a consumer + // node (AudioOscilloscope) running on a different engine runner + // thread. Write-combined memory doesn't participate in normal + // CPU cache coherence between cores, so the consumer's reads + // could miss the producer's writes until some unrelated sync + // event flushed things. Cached memory fixes this, at the cost + // of slightly slower sequential writes (unmeasurable at audio + // sample volumes). + audioBufferDesc.MemoryFlags = nosMemoryFlags(NOS_MEMORY_FLAGS_HOST_VISIBLE | + NOS_MEMORY_FLAGS_DOWNLOAD | + NOS_MEMORY_FLAGS_FORCE_HOST_MEMORY); audioBufferDesc.ElementType = NOS_BUFFER_ELEMENT_TYPE_INT32; AudioPacketBuffer = sys::vulkan::CreateBuffer(audioBufferDesc, "SystemAudioInput AudioBuffer"); @@ -459,57 +215,38 @@ struct SystemAudioInputNode : NodeContext } } - int32_t* audioSamples = reinterpret_cast(nosVulkan->Map(AudioPacketBuffer)); + auto* audioSamples = reinterpret_cast(nosVulkan->Map(AudioPacketBuffer)); if (!audioSamples) { SetNodeStatusMessageIfChanged("Failed to map audio buffer", fb::NodeStatusMessageType::FAILURE); return NOS_RESULT_FAILED; } - if (Active) + if (active && Capture) { -#ifdef _WIN32 - // Read captured system audio - if (Capture) - { - bool hasAudio = Capture->ReadSamples(audioSamples, numSamples, channelCount, gain); - std::string deviceName = Capture->GetDeviceName(); - std::string deviceSuffix = deviceName.empty() ? "" : " - " + deviceName; - - if (hasAudio) - SetNodeStatusMessageIfChanged("Capturing audio" + deviceSuffix, fb::NodeStatusMessageType::INFO); - else - SetNodeStatusMessageIfChanged("Audio capture is ready" + deviceSuffix, fb::NodeStatusMessageType::INFO); - } - else -#endif - { - // Fill with silence if capture failed - for (uint32_t i = 0; i < numSamples * channelCount; ++i) - { - audioSamples[i] = 0; - } - } + // Don't update the status message every frame based on whether + // this single frame delivered audio — ReadSamples flips true/false + // at the rate of buffer fills, which causes the editor's node + // status area to spam updates. The "ready" message posted after + // Initialize/Start stays put; transitions (inactive, failure) are + // the only things that republish. + Capture->ReadSamples(audioSamples, numSamples, channelCount, gain); } else { - // Fill with silence when inactive for (uint32_t i = 0; i < numSamples * channelCount; ++i) - { audioSamples[i] = 0; - } } - // Update current sample index CurrentSampleIndex += numSamples; AudioPacketDescriptor audioPacketDesc( sampleRate, numSamples, BitDepth::AUDIO_BIT_DEPTH_24_BIT, sizeof(int32_t), channelCount); ObjectRef outDesc{}; - nosEngine.ObjectAPI->CreatePrimitiveObject(NOS_NAME(AudioPacketDescriptor::GetFullyQualifiedName()), - nos::Buffer::From(audioPacketDesc), - &outDesc.GetStorage()); + nosEngine.ObjectAPI->CreatePrimitiveObject(NOS_NAME(AudioPacketDescriptor::GetFullyQualifiedName()), + nos::Buffer::From(audioPacketDesc), + &outDesc.GetStorage()); ObjectRef out{}; std::vector fields; @@ -521,14 +258,12 @@ struct SystemAudioInputNode : NodeContext .FieldName = NOS_NAME("buffer"), .FieldObjectId = AudioPacketBuffer, }); - nosEngine.ObjectAPI->CreateCompositeObject(NOS_NAME(AudioPacket::GetFullyQualifiedName()), - fields.data(), - fields.size(), - &out.GetStorage()); + nosEngine.ObjectAPI->CreateCompositeObject(NOS_NAME(AudioPacket::GetFullyQualifiedName()), + fields.data(), + fields.size(), + &out.GetStorage()); NOS_SOFT_CHECK(out, "Failed to create output AudioPacket object"); - - // Set output pin values SetPinObject(NOS_NAME("AudioPacket"), out); return NOS_RESULT_SUCCESS; @@ -537,13 +272,9 @@ struct SystemAudioInputNode : NodeContext TypedObjectRef AudioPacketBuffer; uint64_t AccumulatedSampleNumerator = 0; uint64_t CurrentSampleIndex = 0; - bool Active = false; bool NeedsReinitialize = false; std::string LastStatusMessage; - -#ifdef _WIN32 - std::unique_ptr Capture; -#endif + std::unique_ptr Capture; }; nosResult RegisterSystemAudioInputNode(nosNodeFunctions* fn)