From 71a5eab8644280ed05ee22bf15268b5f187a49e9 Mon Sep 17 00:00:00 2001 From: asuessenbach Date: Thu, 9 Apr 2026 12:37:22 +0200 Subject: [PATCH] Unify vkb::Stats and vbk::stats::HPPStats into vkb::stats::Stats --- framework/CMakeLists.txt | 2 - framework/gui.h | 20 +- framework/stats/hpp_stats.h | 65 ---- framework/stats/stats.cpp | 476 ------------------------ framework/stats/stats.h | 661 +++++++++++++++++++++++++++------ framework/stats/stats_common.h | 15 +- framework/vulkan_sample.h | 25 +- 7 files changed, 574 insertions(+), 690 deletions(-) delete mode 100644 framework/stats/hpp_stats.h delete mode 100644 framework/stats/stats.cpp diff --git a/framework/CMakeLists.txt b/framework/CMakeLists.txt index c54a592354..d7074d861a 100644 --- a/framework/CMakeLists.txt +++ b/framework/CMakeLists.txt @@ -193,10 +193,8 @@ set(STATS_FILES stats/stats_provider.h stats/frame_time_stats_provider.h stats/vulkan_stats_provider.h - stats/hpp_stats.h # Source Files - stats/stats.cpp stats/stats_provider.cpp stats/frame_time_stats_provider.cpp stats/vulkan_stats_provider.cpp) diff --git a/framework/gui.h b/framework/gui.h index d7971a4496..d7651b6afd 100644 --- a/framework/gui.h +++ b/framework/gui.h @@ -29,7 +29,6 @@ #include "platform/input_events.h" #include "platform/window.h" #include "rendering/hpp_pipeline_state.h" -#include "stats/hpp_stats.h" #include "stats/stats.h" #include #include @@ -106,7 +105,6 @@ class Gui using PipelineLayoutType = typename std::conditional::type; using PipelineShaderStageCreateInfoType = typename std::conditional::type; using RenderPassType = typename std::conditional::type; - using StatsType = typename std::conditional::type; public: /** @@ -128,9 +126,9 @@ class Gui public: /** * @brief Constructs a StatsView - * @param stats Const pointer to the Stats data object; may be null + * @param stats Const pointer to the vkb::stats::Stats data object; may be null */ - StatsView(const StatsType *stats); + StatsView(const vkb::stats::Stats *stats); float get_graph_height() const; @@ -165,7 +163,7 @@ class Gui */ Gui(vkb::rendering::RenderContext &render_context, Window const &window, - StatsType const *stats = nullptr, + vkb::stats::Stats const *stats = nullptr, float font_size = 21.0f, bool explicit_update = false); @@ -259,7 +257,7 @@ class Gui * @param stats Statistics to show (can be null) * @param debug_info Debug info to show (can be null) */ - void show_top_window(const std::string &app_name, const StatsType *stats = nullptr, DebugInfo *debug_info = nullptr); + void show_top_window(const std::string &app_name, const vkb::stats::Stats *stats = nullptr, DebugInfo *debug_info = nullptr); /** * @brief Updates the Gui @@ -273,7 +271,7 @@ class Gui * @brief Shows a child with statistics * @param stats Statistics to show */ - void show_stats(const StatsType &stats); + void show_stats(const vkb::stats::Stats &stats); private: static constexpr char const *default_font = "Roboto-Regular"; // The name of the default font file to use @@ -350,7 +348,7 @@ using GuiCpp = Gui; template inline Gui::Gui( - vkb::rendering::RenderContext &render_context_, Window const &window, StatsType const *stats, float font_size, bool explicit_update) : + vkb::rendering::RenderContext &render_context_, Window const &window, vkb::stats::Stats const *stats, float font_size, bool explicit_update) : render_context{render_context_}, content_scale_factor{window.get_content_scale_factor()}, dpi_factor{window.get_dpi_factor() * content_scale_factor}, @@ -1285,7 +1283,7 @@ inline void Gui::show_simple_window(const std::string &name, uint32 } template -inline void Gui::show_stats(const StatsType &stats) +inline void Gui::show_stats(const vkb::stats::Stats &stats) { for (const auto &stat_index : stats.get_requested_stats()) { @@ -1327,7 +1325,7 @@ inline void Gui::show_stats(const StatsType &stats) } template -inline void Gui::show_top_window(const std::string &app_name, const StatsType *stats, DebugInfo *debug_info) +inline void Gui::show_top_window(const std::string &app_name, const vkb::stats::Stats *stats, DebugInfo *debug_info) { // Transparent background ImGui::SetNextWindowBgAlpha(overlay_alpha); @@ -1501,7 +1499,7 @@ inline void Gui::upload_draw_data(const ImDrawData *draw_data, uint } template -inline Gui::StatsView::StatsView(const StatsType *stats) +inline Gui::StatsView::StatsView(const vkb::stats::Stats *stats) { if (stats == nullptr) { diff --git a/framework/stats/hpp_stats.h b/framework/stats/hpp_stats.h deleted file mode 100644 index c46050991a..0000000000 --- a/framework/stats/hpp_stats.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2021-2025, NVIDIA CORPORATION. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 the "License"; - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "stats/stats.h" - -namespace vkb -{ -namespace rendering -{ -template -class RenderContext; -using RenderContextCpp = RenderContext; -} // namespace rendering - -namespace stats -{ -/** - * @brief facade class around vkb::Stats, providing a vulkan.hpp-based interface - * - * See vkb::Stats for documentation - */ -class HPPStats : private vkb::Stats -{ - public: - using vkb::Stats::get_data; - using vkb::Stats::get_graph_data; - using vkb::Stats::get_requested_stats; - using vkb::Stats::is_available; - using vkb::Stats::request_stats; - using vkb::Stats::resize; - using vkb::Stats::update; - - explicit HPPStats(vkb::rendering::RenderContextCpp &render_context, size_t buffer_size = 16) : - vkb::Stats(reinterpret_cast(render_context), buffer_size) - {} - - void begin_sampling(vkb::core::CommandBufferCpp &cb) - { - vkb::Stats::begin_sampling(reinterpret_cast(cb)); - } - - void end_sampling(vkb::core::CommandBufferCpp &cb) - { - vkb::Stats::end_sampling(reinterpret_cast(cb)); - } -}; - -} // namespace stats -} // namespace vkb diff --git a/framework/stats/stats.cpp b/framework/stats/stats.cpp deleted file mode 100644 index 088e9f5973..0000000000 --- a/framework/stats/stats.cpp +++ /dev/null @@ -1,476 +0,0 @@ -/* Copyright (c) 2018-2025, Arm Limited and Contributors - * Copyright (c) 2020-2025, Broadcom Inc. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 the "License"; - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "stats/stats.h" - -#include -#include -#include - -#include "core/device.h" -#include "frame_time_stats_provider.h" -#ifdef VK_USE_PLATFORM_ANDROID_KHR -# include "hwcpipe_stats_provider.h" -#endif -#include "core/allocated.h" -#include "rendering/render_context.h" -#include "vulkan_stats_provider.h" - -namespace vkb -{ -Stats::Stats(vkb::rendering::RenderContextC &render_context, size_t buffer_size) : - render_context(render_context), - buffer_size(buffer_size) -{ - assert(buffer_size >= 2 && "Buffers size should be greater than 2"); -} - -Stats::~Stats() -{ - if (stop_worker) - { - stop_worker->set_value(); - } - - if (worker_thread.joinable()) - { - worker_thread.join(); - } -} - -void Stats::request_stats(const std::set &wanted_stats, - CounterSamplingConfig config) -{ - if (providers.size() != 0) - { - throw std::runtime_error("Stats must only be requested once"); - } - - requested_stats = wanted_stats; - sampling_config = config; - - // Copy the requested stats, so they can be changed by the providers below - std::set stats = requested_stats; - - // Initialize our list of providers (in priority order) - // All supported stats will be removed from the given 'stats' set by the provider's constructor - // so subsequent providers only see requests for stats that aren't already supported. - providers.emplace_back(std::make_unique(stats)); -#ifdef VK_USE_PLATFORM_ANDROID_KHR - providers.emplace_back(std::make_unique(stats)); -#endif - providers.emplace_back(std::make_unique(stats, sampling_config, render_context)); - - // In continuous sampling mode we still need to update the frame times as if we are polling - // Store the frame time provider here so we can easily access it later. - frame_time_provider = providers[0].get(); - - for (const auto &stat : requested_stats) - { - counters[stat] = std::vector(buffer_size, 0); - } - - if (sampling_config.mode == CounterSamplingMode::Continuous) - { - // Start a thread for continuous sample capture - stop_worker = std::make_unique>(); - - worker_thread = std::thread([this] { - continuous_sampling_worker(stop_worker->get_future()); - }); - - // Reduce smoothing for continuous sampling - alpha_smoothing = 0.6f; - } - - for (const auto &stat_index : requested_stats) - { - if (!is_available(stat_index)) - { - LOGW(vkb::StatsProvider::default_graph_data(stat_index).name + " : not available"); - } - } -} - -void Stats::resize(const size_t width) -{ - // The circular buffer size will be 1/16th of the width of the screen - // which means every sixteen pixels represent one graph value - buffer_size = width >> 4; - - for (auto &counter : counters) - { - counter.second.resize(buffer_size); - counter.second.shrink_to_fit(); - } -} - -bool Stats::is_available(const StatIndex index) const -{ - for (const auto &p : providers) - { - if (p->is_available(index)) - { - return true; - } - } - return false; -} - -static void add_smoothed_value(std::vector &values, float value, float alpha) -{ - assert(values.size() >= 2 && "Buffers size should be greater than 2"); - - if (values.size() == values.capacity()) - { - // Shift values to the left to make space at the end and update counters - std::rotate(values.begin(), values.begin() + 1, values.end()); - } - - // Use an exponential moving average to smooth values - values.back() = value * alpha + *(values.end() - 2) * (1.0f - alpha); -} - -void Stats::update(float delta_time) -{ - switch (sampling_config.mode) - { - case CounterSamplingMode::Polling: - { - StatsProvider::Counters sample; - - for (auto &p : providers) - { - auto s = p->sample(delta_time); - sample.insert(s.begin(), s.end()); - } - push_sample(sample); - break; - } - case CounterSamplingMode::Continuous: - { - // Check that we have no pending samples to be shown - if (pending_samples.size() == 0) - { - std::unique_lock lock(continuous_sampling_mutex); - if (!should_add_to_continuous_samples) - { - // If we have no pending samples, we let the worker thread - // capture samples for the next frame - should_add_to_continuous_samples = true; - } - else - { - // The worker thread has captured a frame, so we stop it - // and read the samples - should_add_to_continuous_samples = false; - pending_samples.clear(); - std::swap(pending_samples, continuous_samples); - } - } - - if (pending_samples.size() == 0) - { - return; - } - - // Ensure the number of pending samples is capped at a reasonable value - if (pending_samples.size() > 100) - { - // Prefer later samples over new samples. - std::move(pending_samples.end() - 100, pending_samples.end(), pending_samples.begin()); - pending_samples.erase(pending_samples.begin() + 100, pending_samples.end()); - - // If we get to this point, we're not reading samples fast enough, nudge a little ahead. - fractional_pending_samples += 1.0f; - } - - // Compute the number of samples to show this frame - float floating_sample_count = sampling_config.speed * delta_time * static_cast(buffer_size) + fractional_pending_samples; - - // Keep track of the fractional value to avoid speeding up or slowing down too much due to rounding errors. - // Generally we push very few samples per frame, so this matters. - fractional_pending_samples = floating_sample_count - std::floor(floating_sample_count); - - auto sample_count = static_cast(floating_sample_count); - - // Clamp the number of samples - sample_count = std::max(1, std::min(sample_count, pending_samples.size())); - - // Get the frame time stats (not a continuous stat) - StatsProvider::Counters frame_time_sample = frame_time_provider->sample(delta_time); - - // Push the samples to circular buffers - std::for_each(pending_samples.begin(), pending_samples.begin() + sample_count, [this, frame_time_sample](auto &s) { - // Write the correct frame time into the continuous stats - s.insert(frame_time_sample.begin(), frame_time_sample.end()); - // Then push the sample to the counters list - this->push_sample(s); - }); - pending_samples.erase(pending_samples.begin(), pending_samples.begin() + sample_count); - - break; - } - } - - profile_counters(); -} - -void Stats::continuous_sampling_worker(std::future should_terminate) -{ - worker_timer.tick(); - - for (auto &p : providers) - { - p->continuous_sample(0.0f); - } - - while (should_terminate.wait_for(std::chrono::seconds(0)) != std::future_status::ready) - { - auto delta_time = static_cast(worker_timer.tick()); - auto interval = std::chrono::duration_cast>(sampling_config.interval).count(); - - // Ensure we wait for the interval specified in config - if (delta_time < interval) - { - std::this_thread::sleep_for(std::chrono::duration(interval - delta_time)); - delta_time += static_cast(worker_timer.tick()); - } - - // Sample counters - StatsProvider::Counters sample; - for (auto &p : providers) - { - StatsProvider::Counters s = p->continuous_sample(delta_time); - sample.insert(s.begin(), s.end()); - } - - // Add the new sample to the vector of continuous samples - { - std::unique_lock lock(continuous_sampling_mutex); - if (should_add_to_continuous_samples) - { - continuous_samples.push_back(sample); - } - } - } -} - -void Stats::push_sample(const StatsProvider::Counters &sample) -{ - for (auto &c : counters) - { - StatIndex idx = c.first; - std::vector &values = c.second; - - // Find the counter matching this StatIndex in the Sample - const auto &smp = sample.find(idx); - if (smp == sample.end()) - { - continue; - } - - float measurement = static_cast(smp->second.result); - - add_smoothed_value(values, measurement, alpha_smoothing); - } -} - -namespace -{ -// For now names are taken from the stats_provider.cpp file -const char *to_string(StatIndex index) -{ - switch (index) - { - case StatIndex::frame_times: - return "Frame Times (ms)"; - case StatIndex::cpu_cycles: - return "CPU Cycles (M/s)"; - case StatIndex::cpu_instructions: - return "CPU Instructions (M/s)"; - case StatIndex::cpu_cache_miss_ratio: - return "Cache Miss Ratio (%)"; - case StatIndex::cpu_branch_miss_ratio: - return "Branch Miss Ratio (%)"; - case StatIndex::cpu_l1_accesses: - return "CPU L1 Accesses (M/s)"; - case StatIndex::cpu_instr_retired: - return "CPU Instructions Retired (M/s)"; - case StatIndex::cpu_l2_accesses: - return "CPU L2 Accesses (M/s)"; - case StatIndex::cpu_l3_accesses: - return "CPU L3 Accesses (M/s)"; - case StatIndex::cpu_bus_reads: - return "CPU Bus Read Beats (M/s)"; - case StatIndex::cpu_bus_writes: - return "CPU Bus Write Beats (M/s)"; - case StatIndex::cpu_mem_reads: - return "CPU Memory Read Instructions (M/s)"; - case StatIndex::cpu_mem_writes: - return "CPU Memory Write Instructions (M/s)"; - case StatIndex::cpu_ase_spec: - return "CPU Speculatively Exec. SIMD Instructions (M/s)"; - case StatIndex::cpu_vfp_spec: - return "CPU Speculatively Exec. FP Instructions (M/s)"; - case StatIndex::cpu_crypto_spec: - return "CPU Speculatively Exec. Crypto Instructions (M/s)"; - case StatIndex::gpu_cycles: - return "GPU Cycles (M/s)"; - case StatIndex::gpu_vertex_cycles: - return "Vertex Cycles (M/s)"; - case StatIndex::gpu_load_store_cycles: - return "Load Store Cycles (k/s)"; - case StatIndex::gpu_tiles: - return "Tiles (k/s)"; - case StatIndex::gpu_killed_tiles: - return "Tiles killed by CRC match (k/s)"; - case StatIndex::gpu_fragment_jobs: - return "Fragment Jobs (s)"; - case StatIndex::gpu_fragment_cycles: - return "Fragment Cycles (M/s)"; - case StatIndex::gpu_tex_cycles: - return "Shader Texture Cycles (k/s)"; - case StatIndex::gpu_ext_reads: - return "External Reads (M/s)"; - case StatIndex::gpu_ext_writes: - return "External Writes (M/s)"; - case StatIndex::gpu_ext_read_stalls: - return "External Read Stalls (M/s)"; - case StatIndex::gpu_ext_write_stalls: - return "External Write Stalls (M/s)"; - case StatIndex::gpu_ext_read_bytes: - return "External Read Bytes (MiB/s)"; - case StatIndex::gpu_ext_write_bytes: - return "External Write Bytes (MiB/s)"; - default: - return nullptr; - } -} -} // namespace - -void Stats::profile_counters() const -{ -#if VKB_PROFILING - static std::chrono::high_resolution_clock::time_point last_time = std::chrono::high_resolution_clock::now(); - std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now(); - - if (now - last_time < std::chrono::milliseconds(100)) - { - return; - } - - last_time = now; - - for (auto &c : counters) - { - StatIndex idx = c.first; - auto &graph_data = get_graph_data(idx); - - if (c.second.empty()) - { - continue; - } - - float average = 0.0f; - for (auto &v : c.second) - { - average += v; - } - average /= c.second.size(); - - if (auto *index_name = to_string(idx)) - { - Plot::plot(index_name, average * graph_data.scale_factor); - } - } - - static std::vector labels; - - auto &device = render_context.get_device(); - VmaAllocator allocator = allocated::get_memory_allocator(); - - VmaBudget heap_budgets[VK_MAX_MEMORY_HEAPS]; - vmaGetHeapBudgets(allocator, heap_budgets); - - // We know that we will only ever have one device in the system, so we can cache the labels - if (labels.size() == 0) - { - VkPhysicalDeviceMemoryProperties memory_properties; - vkGetPhysicalDeviceMemoryProperties(device.get_gpu().get_handle(), &memory_properties); - - labels.reserve(memory_properties.memoryHeapCount); - - for (size_t heap = 0; heap < memory_properties.memoryHeapCount; heap++) - { - VkMemoryPropertyFlags flags = memory_properties.memoryHeaps[heap].flags; - labels.push_back("Heap " + std::to_string(heap) + " " + vk::to_string(vk::MemoryPropertyFlags{flags})); - } - } - - for (size_t heap = 0; heap < labels.size(); heap++) - { - Plot::plot(labels[heap].c_str(), heap_budgets[heap].usage / (1024.0f * 1024.0f)); - } -#endif -} - -void Stats::begin_sampling(vkb::core::CommandBufferC &cb) -{ - // Inform the providers - for (auto &p : providers) - { - p->begin_sampling(cb); - } -} - -void Stats::end_sampling(vkb::core::CommandBufferC &cb) -{ - // Inform the providers - for (auto &p : providers) - { - p->end_sampling(cb); - } -} - -const StatGraphData &Stats::get_graph_data(StatIndex index) const -{ - for (auto &p : providers) - { - if (p->is_available(index)) - { - return p->get_graph_data(index); - } - } - return StatsProvider::default_graph_data(index); -} - -StatGraphData::StatGraphData(const std::string &name, - const std::string &graph_label_format, - float scale_factor, - bool has_fixed_max, - float max_value) : - name(name), - format{graph_label_format}, - scale_factor{scale_factor}, - has_fixed_max{has_fixed_max}, - max_value{max_value} -{ -} - -} // namespace vkb diff --git a/framework/stats/stats.h b/framework/stats/stats.h index 5ecb75ae8f..7b820db0d1 100644 --- a/framework/stats/stats.h +++ b/framework/stats/stats.h @@ -1,5 +1,6 @@ -/* Copyright (c) 2018-2025, Arm Limited and Contributors - * Copyright (c) 2020-2025, Broadcom Inc. +/* Copyright (c) 2018-2026, Arm Limited and Contributors + * Copyright (c) 2020-2026, Broadcom Inc. + * Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -18,37 +19,26 @@ #pragma once -#include -#include #include -#include -#include -#include -#include "stats_common.h" -#include "stats_provider.h" +#include "core/util/profiling.hpp" +#include "stats/frame_time_stats_provider.h" +#include "stats/stats_common.h" +#include "stats/stats_provider.h" +#include "stats/vulkan_stats_provider.h" #include "timer.h" +#ifdef VK_USE_PLATFORM_ANDROID_KHR +# include "stats/hwcpipe_stats_provider.h" +#endif namespace vkb { - -namespace core -{ -template -class CommandBuffer; -using CommandBufferC = CommandBuffer; -} // namespace core - -namespace rendering +namespace stats { -template -class RenderContext; -using RenderContextC = RenderContext; -} // namespace rendering - /* * @brief Helper class for querying statistics about the CPU and the GPU */ +template class Stats { public: @@ -57,7 +47,7 @@ class Stats * @param render_context The RenderContext for this sample * @param buffer_size Size of the circular buffers */ - explicit Stats(vkb::rendering::RenderContextC &render_context, size_t buffer_size = 16); + explicit Stats(vkb::rendering::RenderContext &render_context, size_t buffer_size = 16); /** * @brief Destroys the Stats object @@ -65,146 +55,585 @@ class Stats ~Stats(); /** - * @brief Request specific set of stats to be collected - * @param requested_stats Set of stats to be collected if available - * @param sampling_config Sampling mode configuration (polling or continuous) + * @brief A command buffer that we want to collect stats about has just begun + * + * Some stats providers (like the Vulkan extension one) can only collect stats + * about the execution of a specific command buffer. In those cases we need to + * know when a command buffer has begun and when it's about to end so that we + * can inject some extra commands into the command buffer to control the stats + * collection. This method tells the stats provider that a command buffer has + * begun so that can happen. The command buffer must be in a recording state + * when this method is called. + * @param cb The command buffer */ - void request_stats(const std::set &requested_stats, - CounterSamplingConfig sampling_config = {CounterSamplingMode::Polling}); + void begin_sampling(vkb::core::CommandBuffer &cb); /** - * @brief Resizes the stats buffers according to the width of the screen - * @param width The width of the screen + * @brief A command buffer that we want to collect stats about is about to be ended + * + * Some stats providers (like the Vulkan extension one) can only collect stats + * about the execution of a specific command buffer. In those cases we need to + * know when a command buffer has begun and when it's about to end so that we + * can inject some extra commands into the command buffer to control the stats + * collection. This method tells the stats provider that a command buffer is + * about to be ended so that can happen. The command buffer must be in a recording + * state when this method is called. + * @param cb The command buffer */ - void resize(size_t width); + void end_sampling(vkb::core::CommandBuffer &cb); /** - * @brief Checks if an enabled stat is available in the current platform - * @param index The stat index - * @return True if the stat is available, false otherwise + * @brief Returns the collected data for a specific statistic + * @param index The stat index of the data requested + * @return The data of the specified stat */ - bool is_available(StatIndex index) const; + std::vector const &get_data(StatIndex index) const; /** * @brief Returns data relevant for graphing a specific statistic * @param index The stat index of the data requested * @return The data of the specified stat */ - const StatGraphData &get_graph_data(StatIndex index) const; + StatGraphData const &get_graph_data(StatIndex index) const; /** - * @brief Returns the collected data for a specific statistic - * @param index The stat index of the data requested - * @return The data of the specified stat + * @return The requested stats */ - const std::vector &get_data(StatIndex index) const - { - return counters.at(index); - }; + std::set const &get_requested_stats() const; /** - * @return The requested stats + * @brief Checks if an enabled stat is available in the current platform + * @param index The stat index + * @return True if the stat is available, false otherwise */ - const std::set &get_requested_stats() const - { - return requested_stats; - } + bool is_available(StatIndex index) const; /** - * @brief Update statistics, must be called after every frame - * @param delta_time Time since last update + * @brief Request specific set of stats to be collected + * @param requested_stats Set of stats to be collected if available + * @param sampling_config Sampling mode configuration (polling or continuous) */ - void update(float delta_time); + void request_stats(const std::set &requested_stats, + CounterSamplingConfig sampling_config = {CounterSamplingMode::Polling}); /** - * @brief A command buffer that we want to collect stats about has just begun - * - * Some stats providers (like the Vulkan extension one) can only collect stats - * about the execution of a specific command buffer. In those cases we need to - * know when a command buffer has begun and when it's about to end so that we - * can inject some extra commands into the command buffer to control the stats - * collection. This method tells the stats provider that a command buffer has - * begun so that can happen. The command buffer must be in a recording state - * when this method is called. - * @param cb The command buffer + * @brief Resizes the stats buffers according to the width of the screen + * @param width The width of the screen */ - void begin_sampling(vkb::core::CommandBufferC &cb); + void resize(size_t width); /** - * @brief A command buffer that we want to collect stats about is about to be ended - * - * Some stats providers (like the Vulkan extension one) can only collect stats - * about the execution of a specific command buffer. In those cases we need to - * know when a command buffer has begun and when it's about to end so that we - * can inject some extra commands into the command buffer to control the stats - * collection. This method tells the stats provider that a command buffer is - * about to be ended so that can happen. The command buffer must be in a recording - * state when this method is called. - * @param cb The command buffer + * @brief Update statistics, must be called after every frame + * @param delta_time Time since last update */ - void end_sampling(vkb::core::CommandBufferC &cb); + void update(float delta_time); private: - /// The render context - vkb::rendering::RenderContextC &render_context; + /// The worker thread function for continuous sampling; + /// it adds a new entry to continuous_samples at every interval + void continuous_sampling_worker(std::future should_terminate); - /// Stats that were requested - they may not all be available - std::set requested_stats; + // Push counters to external profilers + void profile_counters() const; - /// Provider that tracks frame times - StatsProvider *frame_time_provider; + /// Updates circular buffers for CPU and GPU counters + void push_sample(const vkb::StatsProvider::Counters &sample); - /// A list of stats providers to use in priority order - std::vector> providers; + private: + float alpha_smoothing = 0.2f; // Alpha smoothing for running average + size_t buffer_size; // Size of the circular buffers + std::vector continuous_samples; // The samples read during continuous sampling + std::mutex continuous_sampling_mutex; // A mutex for accessing measurements during continuous sampling + std::map> counters; // Circular buffers for counter data + float fractional_pending_samples = 0.0f; // A value which helps keep a steady pace of continuous samples output. + vkb::StatsProvider *frame_time_provider; // Provider that tracks frame times + vkb::Timer main_timer; // vkb::Timer used in the main thread to compute delta time + std::vector pending_samples; // The samples waiting to be displayed + std::vector> providers; // A list of stats providers to use in priority order + vkb::rendering::RenderContextCpp &render_context; // The render context + std::set requested_stats; // Stats that were requested - they may not all be available + CounterSamplingConfig sampling_config; // Counter sampling configuration + bool should_add_to_continuous_samples = false; // A flag specifying if the worker thread should add entries to continuous_samples + std::unique_ptr> stop_worker; // Promise to stop the worker thread + std::thread worker_thread; // Worker thread for continuous sampling + vkb::Timer worker_timer; // vkb::Timer used by the worker thread to throttle counter sampling +}; - /// Counter sampling configuration - CounterSamplingConfig sampling_config; +using StatsC = Stats; +using StatsCpp = Stats; - /// Size of the circular buffers - size_t buffer_size; +namespace +{ +static inline void add_smoothed_value(std::vector &values, float value, float alpha) +{ + assert(values.size() >= 2 && "Buffers size should be greater than 2"); - /// Timer used in the main thread to compute delta time - Timer main_timer; + if (values.size() == values.capacity()) + { + // Shift values to the left to make space at the end and update counters + std::rotate(values.begin(), values.begin() + 1, values.end()); + } - /// Timer used by the worker thread to throttle counter sampling - Timer worker_timer; + // Use an exponential moving average to smooth values + values.back() = value * alpha + *(values.end() - 2) * (1.0f - alpha); +} - /// Alpha smoothing for running average - float alpha_smoothing{0.2f}; +// For now names are taken from the stats_provider.cpp file +static inline char const *to_string(StatIndex index) +{ + switch (index) + { + case StatIndex::frame_times: + return "Frame Times (ms)"; + case StatIndex::cpu_cycles: + return "CPU Cycles (M/s)"; + case StatIndex::cpu_instructions: + return "CPU Instructions (M/s)"; + case StatIndex::cpu_cache_miss_ratio: + return "Cache Miss Ratio (%)"; + case StatIndex::cpu_branch_miss_ratio: + return "Branch Miss Ratio (%)"; + case StatIndex::cpu_l1_accesses: + return "CPU L1 Accesses (M/s)"; + case StatIndex::cpu_instr_retired: + return "CPU Instructions Retired (M/s)"; + case StatIndex::cpu_l2_accesses: + return "CPU L2 Accesses (M/s)"; + case StatIndex::cpu_l3_accesses: + return "CPU L3 Accesses (M/s)"; + case StatIndex::cpu_bus_reads: + return "CPU Bus Read Beats (M/s)"; + case StatIndex::cpu_bus_writes: + return "CPU Bus Write Beats (M/s)"; + case StatIndex::cpu_mem_reads: + return "CPU Memory Read Instructions (M/s)"; + case StatIndex::cpu_mem_writes: + return "CPU Memory Write Instructions (M/s)"; + case StatIndex::cpu_ase_spec: + return "CPU Speculatively Exec. SIMD Instructions (M/s)"; + case StatIndex::cpu_vfp_spec: + return "CPU Speculatively Exec. FP Instructions (M/s)"; + case StatIndex::cpu_crypto_spec: + return "CPU Speculatively Exec. Crypto Instructions (M/s)"; + case StatIndex::gpu_cycles: + return "GPU Cycles (M/s)"; + case StatIndex::gpu_vertex_cycles: + return "Vertex Cycles (M/s)"; + case StatIndex::gpu_load_store_cycles: + return "Load Store Cycles (k/s)"; + case StatIndex::gpu_tiles: + return "Tiles (k/s)"; + case StatIndex::gpu_killed_tiles: + return "Tiles killed by CRC match (k/s)"; + case StatIndex::gpu_fragment_jobs: + return "Fragment Jobs (s)"; + case StatIndex::gpu_fragment_cycles: + return "Fragment Cycles (M/s)"; + case StatIndex::gpu_tex_cycles: + return "Shader Texture Cycles (k/s)"; + case StatIndex::gpu_ext_reads: + return "External Reads (M/s)"; + case StatIndex::gpu_ext_writes: + return "External Writes (M/s)"; + case StatIndex::gpu_ext_read_stalls: + return "External Read Stalls (M/s)"; + case StatIndex::gpu_ext_write_stalls: + return "External Write Stalls (M/s)"; + case StatIndex::gpu_ext_read_bytes: + return "External Read Bytes (MiB/s)"; + case StatIndex::gpu_ext_write_bytes: + return "External Write Bytes (MiB/s)"; + default: + return nullptr; + } +} +} // namespace - /// Circular buffers for counter data - std::map> counters{}; +// Member function definitions - /// Worker thread for continuous sampling - std::thread worker_thread; +template <> +inline Stats::Stats(vkb::rendering::RenderContextCpp &render_context, size_t buffer_size) : + render_context(render_context), + buffer_size(buffer_size) +{ + assert(buffer_size >= 2 && "Buffers size should be greater than 2"); +} - /// Promise to stop the worker thread - std::unique_ptr> stop_worker; +template <> +inline Stats::Stats(vkb::rendering::RenderContextC &render_context, size_t buffer_size) : + render_context(reinterpret_cast(render_context)), + buffer_size(buffer_size) +{ + assert(buffer_size >= 2 && "Buffers size should be greater than 2"); +} - /// A mutex for accessing measurements during continuous sampling - std::mutex continuous_sampling_mutex; +template +inline Stats::~Stats() +{ + if (stop_worker) + { + stop_worker->set_value(); + } - /// The samples read during continuous sampling - std::vector continuous_samples; + if (worker_thread.joinable()) + { + worker_thread.join(); + } +} - /// A flag specifying if the worker thread should add entries to continuous_samples - bool should_add_to_continuous_samples{false}; +template +inline void Stats::begin_sampling(vkb::core::CommandBuffer &cb) +{ + // Inform the providers + for (auto &p : providers) + { + if constexpr (bindingType == BindingType::Cpp) + { + p->begin_sampling(reinterpret_cast(cb)); + } + else + { + p->begin_sampling(cb); + } + } +} - /// The samples waiting to be displayed - std::vector pending_samples; +template +inline void Stats::continuous_sampling_worker(std::future should_terminate) +{ + worker_timer.tick(); - /// A value which helps keep a steady pace of continuous samples output. - float fractional_pending_samples{0.0f}; + for (auto &p : providers) + { + p->continuous_sample(0.0f); + } - /// The worker thread function for continuous sampling; - /// it adds a new entry to continuous_samples at every interval - void continuous_sampling_worker(std::future should_terminate); + while (should_terminate.wait_for(std::chrono::seconds(0)) != std::future_status::ready) + { + auto delta_time = static_cast(worker_timer.tick()); + auto interval = std::chrono::duration_cast>(sampling_config.interval).count(); + + // Ensure we wait for the interval specified in config + if (delta_time < interval) + { + std::this_thread::sleep_for(std::chrono::duration(interval - delta_time)); + delta_time += static_cast(worker_timer.tick()); + } + + // Sample counters + vkb::StatsProvider::Counters sample; + for (auto &p : providers) + { + vkb::StatsProvider::Counters s = p->continuous_sample(delta_time); + sample.insert(s.begin(), s.end()); + } + + // Add the new sample to the vector of continuous samples + { + std::unique_lock lock(continuous_sampling_mutex); + if (should_add_to_continuous_samples) + { + continuous_samples.push_back(sample); + } + } + } +} - /// Updates circular buffers for CPU and GPU counters - void push_sample(const StatsProvider::Counters &sample); +template +inline void Stats::end_sampling(vkb::core::CommandBuffer &cb) +{ + // Inform the providers + for (auto &p : providers) + { + if constexpr (bindingType == BindingType::Cpp) + { + p->end_sampling(reinterpret_cast(cb)); + } + else + { + p->end_sampling(cb); + } + } +} - // Push counters to external profilers - void profile_counters() const; +template +inline std::vector const &Stats::get_data(StatIndex index) const +{ + return counters.at(index); }; -} // namespace vkb +template +inline StatGraphData const &Stats::get_graph_data(StatIndex index) const +{ + for (auto &p : providers) + { + if (p->is_available(index)) + { + return p->get_graph_data(index); + } + } + return vkb::StatsProvider::default_graph_data(index); +} + +template +inline std::set const &Stats::get_requested_stats() const +{ + return requested_stats; +} + +template +inline bool Stats::is_available(const StatIndex index) const +{ + for (const auto &p : providers) + { + if (p->is_available(index)) + { + return true; + } + } + return false; +} + +template +inline void Stats::profile_counters() const +{ +#if VKB_PROFILING + static std::chrono::high_resolution_clock::time_point last_time = std::chrono::high_resolution_clock::now(); + std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now(); + + if (now - last_time < std::chrono::milliseconds(100)) + { + return; + } + + last_time = now; + + for (auto &c : counters) + { + StatIndex idx = c.first; + auto &graph_data = get_graph_data(idx); + + if (c.second.empty()) + { + continue; + } + + float average = 0.0f; + for (auto &v : c.second) + { + average += v; + } + average /= c.second.size(); + + if (auto *index_name = to_string(idx)) + { + Plot::plot(index_name, average * graph_data.scale_factor); + } + } + + static std::vector labels; + + auto &device = render_context.get_device(); + VmaAllocator allocator = allocated::get_memory_allocator(); + + VmaBudget heap_budgets[VK_MAX_MEMORY_HEAPS]; + vmaGetHeapBudgets(allocator, heap_budgets); + + // We know that we will only ever have one device in the system, so we can cache the labels + if (labels.size() == 0) + { + vk::PhysicalDeviceMemoryProperties memory_properties = device.get_gpu().get_handle().getMemoryProperties(); + + labels.reserve(memory_properties.memoryHeapCount); + + for (size_t heap = 0; heap < memory_properties.memoryHeapCount; heap++) + { + vk::MemoryHeapFlags flags = memory_properties.memoryHeaps[heap].flags; + labels.push_back("Heap " + std::to_string(heap) + " " + vk::to_string(flags)); + } + } + + for (size_t heap = 0; heap < labels.size(); heap++) + { + Plot::plot(labels[heap].c_str(), heap_budgets[heap].usage / (1024.0f * 1024.0f)); + } +#endif +} + +template +inline void Stats::push_sample(const vkb::StatsProvider::Counters &sample) +{ + for (auto &c : counters) + { + StatIndex idx = c.first; + std::vector &values = c.second; + + // Find the counter matching this StatIndex in the Sample + const auto &smp = sample.find(idx); + if (smp == sample.end()) + { + continue; + } + + float measurement = static_cast(smp->second.result); + + add_smoothed_value(values, measurement, alpha_smoothing); + } +} + +template +inline void Stats::request_stats(const std::set &wanted_stats, CounterSamplingConfig config) +{ + if (providers.size() != 0) + { + throw std::runtime_error("Stats must only be requested once"); + } + + requested_stats = wanted_stats; + sampling_config = config; + + // Copy the requested stats, so they can be changed by the providers below + std::set stats = requested_stats; + + // Initialize our list of providers (in priority order) + // All supported stats will be removed from the given 'stats' set by the provider's constructor + // so subsequent providers only see requests for stats that aren't already supported. + providers.emplace_back(std::make_unique(stats)); +#ifdef VK_USE_PLATFORM_ANDROID_KHR + providers.emplace_back(std::make_unique(stats)); +#endif + providers.emplace_back(std::make_unique(stats, sampling_config, reinterpret_cast(render_context))); + + // In continuous sampling mode we still need to update the frame times as if we are polling + // Store the frame time provider here so we can easily access it later. + frame_time_provider = providers[0].get(); + + for (const auto &stat : requested_stats) + { + counters[stat] = std::vector(buffer_size, 0); + } + + if (sampling_config.mode == CounterSamplingMode::Continuous) + { + // Start a thread for continuous sample capture + stop_worker = std::make_unique>(); + + worker_thread = std::thread([this] { + continuous_sampling_worker(stop_worker->get_future()); + }); + + // Reduce smoothing for continuous sampling + alpha_smoothing = 0.6f; + } + + for (const auto &stat_index : requested_stats) + { + if (!is_available(stat_index)) + { + LOGW(vkb::StatsProvider::default_graph_data(stat_index).name + " : not available"); + } + } +} + +template +inline void Stats::resize(const size_t width) +{ + // The circular buffer size will be 1/16th of the width of the screen + // which means every sixteen pixels represent one graph value + buffer_size = width >> 4; + + for (auto &counter : counters) + { + counter.second.resize(buffer_size); + counter.second.shrink_to_fit(); + } +} + +template +inline void Stats::update(float delta_time) +{ + switch (sampling_config.mode) + { + case CounterSamplingMode::Polling: + { + vkb::StatsProvider::Counters sample; + + for (auto &p : providers) + { + auto s = p->sample(delta_time); + sample.insert(s.begin(), s.end()); + } + push_sample(sample); + break; + } + case CounterSamplingMode::Continuous: + { + // Check that we have no pending samples to be shown + if (pending_samples.size() == 0) + { + std::unique_lock lock(continuous_sampling_mutex); + if (!should_add_to_continuous_samples) + { + // If we have no pending samples, we let the worker thread + // capture samples for the next frame + should_add_to_continuous_samples = true; + } + else + { + // The worker thread has captured a frame, so we stop it + // and read the samples + should_add_to_continuous_samples = false; + pending_samples.clear(); + std::swap(pending_samples, continuous_samples); + } + } + + if (pending_samples.size() == 0) + { + return; + } + + // Ensure the number of pending samples is capped at a reasonable value + if (pending_samples.size() > 100) + { + // Prefer later samples over new samples. + std::move(pending_samples.end() - 100, pending_samples.end(), pending_samples.begin()); + pending_samples.erase(pending_samples.begin() + 100, pending_samples.end()); + + // If we get to this point, we're not reading samples fast enough, nudge a little ahead. + fractional_pending_samples += 1.0f; + } + + // Compute the number of samples to show this frame + float floating_sample_count = sampling_config.speed * delta_time * static_cast(buffer_size) + fractional_pending_samples; + + // Keep track of the fractional value to avoid speeding up or slowing down too much due to rounding errors. + // Generally we push very few samples per frame, so this matters. + fractional_pending_samples = floating_sample_count - std::floor(floating_sample_count); + + auto sample_count = static_cast(floating_sample_count); + + // Clamp the number of samples + sample_count = std::max(1, std::min(sample_count, pending_samples.size())); + + // Get the frame time stats (not a continuous stat) + vkb::StatsProvider::Counters frame_time_sample = frame_time_provider->sample(delta_time); + + // Push the samples to circular buffers + std::for_each(pending_samples.begin(), pending_samples.begin() + sample_count, [this, frame_time_sample](auto &s) { + // Write the correct frame time into the continuous stats + s.insert(frame_time_sample.begin(), frame_time_sample.end()); + // Then push the sample to the counters list + this->push_sample(s); + }); + pending_samples.erase(pending_samples.begin(), pending_samples.begin() + sample_count); + + break; + } + } + + profile_counters(); +} + +} // namespace stats +} // namespace vkb \ No newline at end of file diff --git a/framework/stats/stats_common.h b/framework/stats/stats_common.h index 60751f7699..90cc150651 100644 --- a/framework/stats/stats_common.h +++ b/framework/stats/stats_common.h @@ -1,5 +1,5 @@ -/* Copyright (c) 2018-2022, Arm Limited and Contributors - * Copyright (c) 2020-2022, Broadcom Inc. +/* Copyright (c) 2018-2026, Arm Limited and Contributors + * Copyright (c) 2020-2026, Broadcom Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -28,7 +28,7 @@ namespace vkb { /** - * @brief Handles of stats to be optionally enabled in @ref vkb::Stats + * @brief Handles of stats to be optionally enabled in @ref vkb::stats::Stats */ enum class StatIndex { @@ -123,7 +123,14 @@ class StatGraphData const std::string &format, float scale_factor = 1.0f, bool has_fixed_max = false, - float max_value = 0.0f); + float max_value = 0.0f) : + name(name), + format{format}, + scale_factor{scale_factor}, + has_fixed_max{has_fixed_max}, + max_value{max_value} + { + } StatGraphData() = default; diff --git a/framework/vulkan_sample.h b/framework/vulkan_sample.h index 288d98ae91..ac4e455624 100644 --- a/framework/vulkan_sample.h +++ b/framework/vulkan_sample.h @@ -29,7 +29,7 @@ #include "scene_graph/components/camera.h" #include "scene_graph/script.h" #include "scene_graph/scripts/animation.h" -#include "stats/hpp_stats.h" +#include "stats/stats.h" #if defined(PLATFORM__MACOS) # include @@ -119,11 +119,6 @@ template class RenderContext; } // namespace rendering -namespace stats -{ -class HPPStats; -} - template class VulkanSample : public vkb::Application { @@ -136,8 +131,6 @@ class VulkanSample : public vkb::Application VulkanSample() = default; ~VulkanSample() override; - using StatsType = typename std::conditional::type; - using Extent2DType = typename std::conditional::type; using DebugReportCallbackCreateInfoType = typename std::conditional::type; using DebugUtilsMessengerCreateInfoType = typename std::conditional::type; @@ -249,7 +242,7 @@ class VulkanSample : public vkb::Application */ void add_device_extension(const char *extension, bool optional = false); - void create_gui(const Window &window, StatsType const *stats = nullptr, const float font_size = 21.0f, bool explicit_update = false); + void create_gui(const Window &window, vkb::stats::Stats const *stats = nullptr, const float font_size = 21.0f, bool explicit_update = false); /** * @brief A helper to create a render context @@ -265,7 +258,7 @@ class VulkanSample : public vkb::Application vkb::rendering::RenderPipeline &get_render_pipeline(); vkb::rendering::RenderPipeline const &get_render_pipeline() const; vkb::scene_graph::Scene &get_scene(); - StatsType &get_stats(); + vkb::stats::Stats &get_stats(); SurfaceType get_surface() const; std::vector &get_surface_priority_list(); std::vector const &get_surface_priority_list() const; @@ -388,7 +381,7 @@ class VulkanSample : public vkb::Application std::unique_ptr gui; - std::unique_ptr stats; + std::unique_ptr stats; static constexpr float STATS_VIEW_RESET_TIME{10.0f}; // 10 seconds @@ -1065,7 +1058,7 @@ inline vkb::scene_graph::Scene &VulkanSample::get_scen } template -inline typename VulkanSample::StatsType &VulkanSample::get_stats() +inline vkb::stats::Stats &VulkanSample::get_stats() { if constexpr (bindingType == BindingType::Cpp) { @@ -1073,7 +1066,7 @@ inline typename VulkanSample::StatsType &VulkanSample: } else { - return reinterpret_cast(*stats); + return reinterpret_cast(*stats); } } @@ -1343,7 +1336,7 @@ inline bool VulkanSample::prepare(const ApplicationOptions &options create_render_context(); prepare_render_context(); - stats = std::make_unique(*render_context); + stats = std::make_unique(*render_context); // Start the sample in the first GUI configuration configuration.reset(); @@ -1352,7 +1345,7 @@ inline bool VulkanSample::prepare(const ApplicationOptions &options } template -inline void VulkanSample::create_gui(const Window &window, StatsType const *stats, const float font_size, bool explicit_update) +inline void VulkanSample::create_gui(const Window &window, vkb::stats::Stats const *stats, const float font_size, bool explicit_update) { if constexpr (bindingType == BindingType::Cpp) { @@ -1362,7 +1355,7 @@ inline void VulkanSample::create_gui(const Window &window, StatsTyp { gui = std::make_unique(reinterpret_cast(get_render_context()), window, - reinterpret_cast(stats), + reinterpret_cast(stats), font_size, explicit_update); }