From e2174a7c6304e0c4c5043693d065d763f77bbde8 Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Wed, 15 Apr 2020 12:56:23 +0100 Subject: [PATCH 01/11] Adding Vulkan-style statistics counters where available Previously, the 'stats' interface just used HWCPipe to interrogate performance counters from the device. HWCPipe is not widely supported, so this really only worked for Arm devices. This change allows multiple 'StatsProviders' to be used. Each provider can supply all, some, or none of the requested counters depending on its capabilities. Any counters not supplied by one provider will be requested of the next in the list until one is found, or we run out of providers. So, CPU counters can come from HWCPipe for example, whilst GPU counters come from Vulkan's extension which has been added as another provider. The StatsProvider defines an abstract interface that is implemented by the concrete providers. The parameters used for graphing the statistics have moved from the GUI classes into the StatsProvider also. This allows providers to modify the names, units and scaling of counters where these differ from the default values. --- bldsys/cmake/template/sample/sample.cpp.in | 8 +- framework/CMakeLists.txt | 24 +- framework/api_vulkan_sample.cpp | 2 +- framework/core/command_buffer.cpp | 21 + framework/core/command_buffer.h | 9 + framework/core/device.cpp | 63 ++- framework/core/instance.cpp | 11 + framework/core/query_pool.cpp | 67 +++ framework/core/query_pool.h | 80 +++ framework/gui.cpp | 36 +- framework/gui.h | 110 +---- framework/pch.h | 2 +- framework/rendering/render_frame.h | 1 + framework/stats.cpp | 356 -------------- framework/stats.h | 276 ----------- framework/stats/frame_time_stats_provider.cpp | 45 ++ framework/stats/frame_time_stats_provider.h | 46 ++ framework/stats/hwcpipe_stats_provider.cpp | 218 +++++++++ framework/stats/hwcpipe_stats_provider.h | 129 +++++ framework/stats/stats.cpp | 273 +++++++++++ framework/stats/stats.h | 169 +++++++ framework/stats/stats_common.h | 120 +++++ framework/stats/stats_provider.cpp | 53 ++ framework/stats/stats_provider.h | 106 ++++ framework/stats/vulkan_stats_provider.cpp | 460 ++++++++++++++++++ framework/stats/vulkan_stats_provider.h | 161 ++++++ framework/vulkan_sample.cpp | 12 +- framework/vulkan_sample.h | 4 +- samples/performance/afbc/afbc.cpp | 9 +- .../command_buffer_usage.cpp | 21 +- .../descriptor_management.cpp | 17 +- .../layout_transitions/layout_transitions.cpp | 11 +- samples/performance/msaa/msaa.cpp | 13 +- .../pipeline_barriers/pipeline_barriers.cpp | 11 +- .../pipeline_cache/pipeline_cache.cpp | 9 +- .../render_passes/render_passes.cpp | 18 +- .../render_subpasses/render_subpasses.cpp | 19 +- .../specialization_constants.cpp | 9 +- .../surface_rotation/surface_rotation.cpp | 20 +- .../swapchain_images/swapchain_images.cpp | 9 +- samples/performance/wait_idle/wait_idle.cpp | 9 +- tests/system_test/sub_tests/bonza/bonza.cpp | 2 +- tests/system_test/sub_tests/sponza/sponza.cpp | 2 +- .../test_framework/gltf_loader_test.cpp | 2 +- .../test_framework/vulkan_test.cpp | 2 +- 45 files changed, 2205 insertions(+), 840 deletions(-) create mode 100644 framework/core/query_pool.cpp create mode 100644 framework/core/query_pool.h delete mode 100644 framework/stats.cpp delete mode 100644 framework/stats.h create mode 100644 framework/stats/frame_time_stats_provider.cpp create mode 100644 framework/stats/frame_time_stats_provider.h create mode 100644 framework/stats/hwcpipe_stats_provider.cpp create mode 100644 framework/stats/hwcpipe_stats_provider.h create mode 100644 framework/stats/stats.cpp create mode 100644 framework/stats/stats.h create mode 100644 framework/stats/stats_common.h create mode 100644 framework/stats/stats_provider.cpp create mode 100644 framework/stats/stats_provider.h create mode 100644 framework/stats/vulkan_stats_provider.cpp create mode 100644 framework/stats/vulkan_stats_provider.h diff --git a/bldsys/cmake/template/sample/sample.cpp.in b/bldsys/cmake/template/sample/sample.cpp.in index ea64d92aee..332e1dc4e8 100644 --- a/bldsys/cmake/template/sample/sample.cpp.in +++ b/bldsys/cmake/template/sample/sample.cpp.in @@ -23,7 +23,7 @@ #include "platform/filesystem.h" #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" -#include "stats.h" +#include "stats/stats.h" @SAMPLE_NAME@::@SAMPLE_NAME@() { @@ -51,9 +51,11 @@ bool @SAMPLE_NAME@::prepare(vkb::Platform &platform) render_pipeline.add_subpass(std::move(scene_subpass)); set_render_pipeline(std::move(render_pipeline)); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + // Add a GUI with the stats you want to monitor - stats = std::make_unique(std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window()); + stats = std::make_unique(get_device(), num_framebuffers, std::set{vkb::StatIndex::frame_times}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/framework/CMakeLists.txt b/framework/CMakeLists.txt index 829d084b7c..c39633507c 100644 --- a/framework/CMakeLists.txt +++ b/framework/CMakeLists.txt @@ -22,7 +22,6 @@ project(framework LANGUAGES C CXX) set(FRAMEWORK_FILES # Header Files gui.h - stats.h glsl_compiler.h spirv_reflection.h gltf_loader.h @@ -41,7 +40,6 @@ set(FRAMEWORK_FILES camera.h # Source Files gui.cpp - stats.cpp glsl_compiler.cpp spirv_reflection.cpp gltf_loader.cpp @@ -164,6 +162,22 @@ set(SCENE_GRAPH_SCRIPTS_FILES scene_graph/scripts/free_camera.cpp scene_graph/scripts/node_animation.cpp) +set(STATS_FILES + # Header Files + stats/stats.h + stats/stats_common.h + stats/stats_provider.h + stats/frame_time_stats_provider.h + stats/hwcpipe_stats_provider.h + stats/vulkan_stats_provider.h + + # Source Files + stats/stats.cpp + stats/stats_provider.cpp + stats/frame_time_stats_provider.cpp + stats/hwcpipe_stats_provider.cpp + stats/vulkan_stats_provider.cpp) + set(CORE_FILES # Header Files core/instance.h @@ -186,6 +200,7 @@ set(CORE_FILES core/sampler.h core/framebuffer.h core/render_pass.h + core/query_pool.h # Source Files core/instance.cpp core/physical_device.cpp @@ -207,7 +222,8 @@ set(CORE_FILES core/instance.cpp core/sampler.cpp core/framebuffer.cpp - core/render_pass.cpp) + core/render_pass.cpp + core/query_pool.cpp) set(PLATFORM_FILES # Header Files @@ -292,6 +308,7 @@ source_group("rendering\\subpasses" FILES ${RENDERING_SUBPASSES_FILES}) source_group("scene_graph\\" FILES ${SCENE_GRAPH_FILES}) source_group("scene_graph\\components\\" FILES ${SCENE_GRAPH_COMPONENT_FILES}) source_group("scene_graph\\scripts\\" FILES ${SCENE_GRAPH_SCRIPTS_FILES}) +source_group("stats\\" FILES ${STATS_FILES}) source_group("graphing\\" FILES ${GRAPHING_FILES}) set(PROJECT_FILES @@ -305,6 +322,7 @@ set(PROJECT_FILES ${SCENE_GRAPH_FILES} ${SCENE_GRAPH_COMPONENT_FILES} ${SCENE_GRAPH_SCRIPTS_FILES} + ${STATS_FILES} ${GRAPHING_FILES}) # Add files based on platform diff --git a/framework/api_vulkan_sample.cpp b/framework/api_vulkan_sample.cpp index 2e97439d63..d1f4f56436 100644 --- a/framework/api_vulkan_sample.cpp +++ b/framework/api_vulkan_sample.cpp @@ -70,7 +70,7 @@ bool ApiVulkanSample::prepare(vkb::Platform &platform) width = get_render_context().get_surface_extent().width; height = get_render_context().get_surface_extent().height; - gui = std::make_unique(*this, platform.get_window(), 15.0f, true); + gui = std::make_unique(*this, platform.get_window(), /*stats=*/nullptr, 15.0f, true); gui->prepare(pipeline_cache, render_pass, {load_shader("uioverlay/uioverlay.vert", VK_SHADER_STAGE_VERTEX_BIT), load_shader("uioverlay/uioverlay.frag", VK_SHADER_STAGE_FRAGMENT_BIT)}); diff --git a/framework/core/command_buffer.cpp b/framework/core/command_buffer.cpp index fbc6dbe967..5e47dc9273 100644 --- a/framework/core/command_buffer.cpp +++ b/framework/core/command_buffer.cpp @@ -755,6 +755,27 @@ const bool CommandBuffer::is_render_size_optimal(const VkExtent2D &framebuffer_e ((render_area.extent.height % render_area_granularity.height == 0) || (render_area.offset.y + render_area.extent.height == framebuffer_extent.height))); } +void CommandBuffer::reset_query_pool(const QueryPool &query_pool, uint32_t first_query, uint32_t query_count) +{ + vkCmdResetQueryPool(get_handle(), query_pool.get_handle(), first_query, query_count); +} + +void CommandBuffer::begin_query(const QueryPool &query_pool, uint32_t query, VkQueryControlFlags flags) +{ + vkCmdBeginQuery(get_handle(), query_pool.get_handle(), query, flags); +} + +void CommandBuffer::end_query(const QueryPool &query_pool, uint32_t query) +{ + vkCmdEndQuery(get_handle(), query_pool.get_handle(), query); +} + +void CommandBuffer::write_timestamp(VkPipelineStageFlagBits pipelineStage, + const QueryPool &query_pool, uint32_t query) +{ + vkCmdWriteTimestamp(get_handle(), pipelineStage, query_pool.get_handle(), query); +} + VkResult CommandBuffer::reset(ResetMode reset_mode) { VkResult result = VK_SUCCESS; diff --git a/framework/core/command_buffer.h b/framework/core/command_buffer.h index f06f2394e5..eb28e7308b 100644 --- a/framework/core/command_buffer.h +++ b/framework/core/command_buffer.h @@ -24,6 +24,7 @@ #include "core/buffer.h" #include "core/image.h" #include "core/image_view.h" +#include "core/query_pool.h" #include "core/sampler.h" #include "rendering/pipeline_state.h" #include "rendering/render_target.h" @@ -219,6 +220,14 @@ class CommandBuffer void set_update_after_bind(bool update_after_bind_); + void reset_query_pool(const QueryPool &query_pool, uint32_t first_query, uint32_t query_count); + + void begin_query(const QueryPool &query_pool, uint32_t query, VkQueryControlFlags flags); + + void end_query(const QueryPool &query_pool, uint32_t query); + + void write_timestamp(VkPipelineStageFlagBits pipelineStage, const QueryPool &query_pool, uint32_t query); + /** * @brief Reset the command buffer to a state where it can be recorded to * @param reset_mode How to reset the buffer, should match the one used by the pool to allocate it diff --git a/framework/core/device.cpp b/framework/core/device.cpp index bcb135cc9f..501a80e1af 100644 --- a/framework/core/device.cpp +++ b/framework/core/device.cpp @@ -68,13 +68,48 @@ Device::Device(const PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_m bool can_get_memory_requirements = is_extension_supported("VK_KHR_get_memory_requirements2"); bool has_dedicated_allocation = is_extension_supported("VK_KHR_dedicated_allocation"); + // For performance queries, we also use host query reset since queryPool resets cannot + // live in the same command buffer as beginQuery + bool has_performance_query = is_extension_supported("VK_KHR_performance_query") && + is_extension_supported("VK_EXT_host_query_reset"); + if (can_get_memory_requirements && has_dedicated_allocation) { enabled_extensions.push_back("VK_KHR_get_memory_requirements2"); enabled_extensions.push_back("VK_KHR_dedicated_allocation"); + LOGI("Dedicated Allocation enabled"); } + VkPhysicalDeviceHostQueryResetFeatures hqr_features{}; + hqr_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES; + + VkPhysicalDevicePerformanceQueryFeaturesKHR perf_features{}; + perf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR; + perf_features.pNext = &hqr_features; + + if (has_performance_query) + { + // Must have VK_KHR_get_physical_device_properties2 as it's a prerequisite of perf query + VkPhysicalDeviceFeatures2KHR supported_features{}; + supported_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + supported_features.pNext = &perf_features; + + // Check the feature support bits + vkGetPhysicalDeviceFeatures2KHR(gpu.get_handle(), &supported_features); + + if (perf_features.performanceCounterQueryPools && hqr_features.hostQueryReset) + { + enabled_extensions.push_back("VK_KHR_performance_query"); + enabled_extensions.push_back("VK_EXT_host_query_reset"); + LOGI("Performance query enabled"); + } + else + { + has_performance_query = false; + } + } + // Check that extensions are supported before trying to create the device std::vector unsupported_extensions{}; for (auto &extension : requested_extensions) @@ -123,16 +158,34 @@ Device::Device(const PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_m VkDeviceCreateInfo create_info{VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO}; - // Latest requested feature will have the pNext's all set up for device creation. - create_info.pNext = gpu.get_requested_extension_features(); - create_info.pQueueCreateInfos = queue_create_infos.data(); create_info.queueCreateInfoCount = to_u32(queue_create_infos.size()); - const auto requested_gpu_features = gpu.get_requested_features(); - create_info.pEnabledFeatures = &requested_gpu_features; create_info.enabledExtensionCount = to_u32(enabled_extensions.size()); create_info.ppEnabledExtensionNames = enabled_extensions.data(); + const auto requested_gpu_features = gpu.get_requested_features(); + + if (has_performance_query) + { + // Ensure we turn on the feature bits we want + VkPhysicalDeviceFeatures2KHR requested_features{}; + requested_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + requested_features.features = requested_gpu_features; + requested_features.pNext = &perf_features; + + // The pNext chain will be perf_features -> hqr_features -> gpu.get_requested_extension_features() + hqr_features.pNext = gpu.get_requested_extension_features(); + + create_info.pNext = &requested_features; + create_info.pEnabledFeatures = nullptr; + } + else + { + // Latest requested feature will have the pNext's all set up for device creation. + create_info.pNext = gpu.get_requested_extension_features(); + create_info.pEnabledFeatures = &requested_gpu_features; + } + VkResult result = vkCreateDevice(gpu.get_handle(), &create_info, nullptr, &handle); if (result != VK_SUCCESS) diff --git a/framework/core/instance.cpp b/framework/core/instance.cpp index 28148c42ba..51326def60 100644 --- a/framework/core/instance.cpp +++ b/framework/core/instance.cpp @@ -187,6 +187,17 @@ Instance::Instance(const std::string & application_nam enabled_extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); } + for (auto &available_extension : available_instance_extensions) + { + // VK_KHR_get_physical_device_properties2 is a prerequisite of VK_KHR_performance_query + // which will be used for stats gathering where available. + if (strcmp(available_extension.extensionName, VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME) == 0) + { + LOGI("{} is available, enabling it", VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + enabled_extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + } + } + auto extension_error = false; for (auto extension : required_extensions) { diff --git a/framework/core/query_pool.cpp b/framework/core/query_pool.cpp new file mode 100644 index 0000000000..8613979afa --- /dev/null +++ b/framework/core/query_pool.cpp @@ -0,0 +1,67 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "query_pool.h" + +#include "device.h" + +namespace vkb +{ +QueryPool::QueryPool(Device &d, const VkQueryPoolCreateInfo &info) : + device{d} +{ + VK_CHECK(vkCreateQueryPool(device.get_handle(), &info, nullptr, &handle)); +} + +QueryPool::QueryPool(QueryPool &&other) : + device{other.device}, + handle{other.handle} +{ + other.handle = VK_NULL_HANDLE; +} + +QueryPool::~QueryPool() +{ + if (handle != VK_NULL_HANDLE) + { + vkDestroyQueryPool(device.get_handle(), handle, nullptr); + } +} + +VkQueryPool QueryPool::get_handle() const +{ + assert(handle != VK_NULL_HANDLE && "QueryPool handle is invalid"); + return handle; +} + +void QueryPool::host_reset(uint32_t firstQuery, uint32_t queryCount) +{ + assert(device.is_enabled("VK_EXT_host_query_reset") && + "VK_EXT_host_query_reset needs to be enabled to call QueryPool::host_reset"); + + vkResetQueryPoolEXT(device.get_handle(), get_handle(), firstQuery, queryCount); +} + +VkResult QueryPool::get_results(uint32_t first_query, uint32_t num_queries, + size_t result_bytes, void *results, VkDeviceSize stride, + VkQueryResultFlags flags) +{ + return vkGetQueryPoolResults(device.get_handle(), get_handle(), first_query, num_queries, + result_bytes, results, stride, flags); +} + +} // namespace vkb diff --git a/framework/core/query_pool.h b/framework/core/query_pool.h new file mode 100644 index 0000000000..b92c6dc40d --- /dev/null +++ b/framework/core/query_pool.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "common/helpers.h" +#include "common/vk_common.h" + +namespace vkb +{ +class Device; + +/** + * @brief Represents a Vulkan Query Pool + */ +class QueryPool +{ + public: + /** + * @brief Creates a Vulkan Query Pool + * @param d The device to use + * @param info Creation details + */ + QueryPool(Device &d, const VkQueryPoolCreateInfo &info); + + QueryPool(const QueryPool &) = delete; + + QueryPool(QueryPool &&pool); + + ~QueryPool(); + + QueryPool &operator=(const QueryPool &) = delete; + + QueryPool &operator=(QueryPool &&) = delete; + + /** + * @return The vulkan query pool handle + */ + VkQueryPool get_handle() const; + + /** + * @brief Reset a range of queries in the query pool. Only call if VK_EXT_host_query_reset is enabled. + * @param pool The query pool + * @param firstQuery The first query to reset + * @param queryCount The number of queries to reset + */ + void host_reset(uint32_t firstQuery, uint32_t queryCount); + + /** + * @brief Get query pool results + * @param first_query The initial query index + * @param num_queries The number of queries to read + * @param results Result vector, must be large enough to hold results + * @param stride The stride in bytes between results for individual queries + * @param flags A bitmask of VkQueryResultFlagBits + */ + VkResult get_results(uint32_t first_query, uint32_t num_queries, + size_t result_bytes, void *results, VkDeviceSize stride, + VkQueryResultFlags flags); + + private: + Device &device; + + VkQueryPool handle{VK_NULL_HANDLE}; +}; +} // namespace vkb diff --git a/framework/gui.cpp b/framework/gui.cpp index 5697b11ea3..8d20058d4c 100644 --- a/framework/gui.cpp +++ b/framework/gui.cpp @@ -64,7 +64,7 @@ void upload_draw_data(ImDrawData *draw_data, const uint8_t *vertex_data, const u } } -inline void reset_graph_max_value(Gui::StatsView::GraphData &graph_data) +inline void reset_graph_max_value(StatGraphData &graph_data) { // If it does not have a fixed max if (!graph_data.has_fixed_max) @@ -93,11 +93,13 @@ const ImGuiWindowFlags Gui::options_flags = Gui::common_flags; const ImGuiWindowFlags Gui::info_flags = Gui::common_flags | ImGuiWindowFlags_NoInputs; -Gui::Gui(VulkanSample &sample_, const Window &window, const float font_size, bool explicit_update) : +Gui::Gui(VulkanSample &sample_, const Window &window, const Stats *stats, + const float font_size, bool explicit_update) : sample{sample_}, content_scale_factor{window.get_content_scale_factor()}, dpi_factor{window.get_dpi_factor() * content_scale_factor}, - explicit_update{explicit_update} + explicit_update{explicit_update}, + stats_view(stats) { ImGui::CreateContext(); @@ -731,6 +733,20 @@ bool Gui::is_debug_view_active() const return debug_view.active; } +Gui::StatsView::StatsView(const Stats *stats) +{ + if (stats == nullptr) + return; + + // Request graph data information for each stat and record it in graph_map + const std::set &indices = stats->get_requested_stats(); + + for (StatIndex i : indices) + { + graph_map[i] = stats->get_graph_data(i); + } +} + void Gui::StatsView::reset_max_value(const StatIndex index) { auto pr = graph_map.find(index); @@ -884,21 +900,9 @@ void Gui::show_debug_window(DebugInfo &debug_info, const ImVec2 &position) ImGui::End(); } -Gui::StatsView::GraphData::GraphData(const std::string &name_, - const std::string &graph_label_format_, - float scale_factor_, - bool has_fixed_max_, - float max_value_) : - name(name_), - format{graph_label_format_}, - scale_factor{scale_factor_}, - has_fixed_max{has_fixed_max_}, - max_value{max_value_} -{} - void Gui::show_stats(const Stats &stats) { - for (const auto &stat_index : stats.get_enabled_stats()) + for (const auto &stat_index : stats.get_requested_stats()) { // Find the graph data of this stat index auto pr = stats_view.graph_map.find(stat_index); diff --git a/framework/gui.h b/framework/gui.h index d4c0eb292a..b1c4270e57 100644 --- a/framework/gui.h +++ b/framework/gui.h @@ -32,7 +32,7 @@ #include "platform/filesystem.h" #include "platform/input_events.h" #include "rendering/render_context.h" -#include "stats.h" +#include "stats/stats.h" namespace vkb { @@ -187,31 +187,10 @@ class Gui { public: /** - * @brief Per-statistic graph data + * @brief Constructs a StatsView + * @param stats Const pointer to the Stats data object; may be null */ - class GraphData - { - public: - /** - * @brief Constructs data for the graph - * @param name Name of the Stat - * @param format Format of the label - * @param scale_factor Any scaling to apply to the data - * @param has_fixed_max Whether the data should have a fixed max value - * @param max_value The maximum value to use - */ - GraphData(const std::string &name, - const std::string &format, - float scale_factor = 1.0f, - bool has_fixed_max = false, - float max_value = 0.0f); - - std::string name; - std::string format; - float scale_factor; - bool has_fixed_max; - float max_value; - }; + StatsView(const Stats *stats); /** * @brief Resets the max values for the stats @@ -225,83 +204,7 @@ class Gui void reset_max_value(const StatIndex index); /// Per-statistic max values - std::map - graph_map{ - {StatIndex::frame_times, - {/* name = */ "Frame Times", - /* format = */ "{:3.1f} ms", - /* scale_factor = */ 1000.0f}}, - {StatIndex::cpu_cycles, - {/* name = */ "CPU Cycles", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::cpu_instructions, - {/* name = */ "CPU Instructions", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::cache_miss_ratio, - {/* name = */ "Cache Miss Ratio", - /* format = */ "{:3.1f}%", - /* scale_factor = */ 100.0f, - /* has_fixed_max = */ true, - /* max_value = */ 100.0f}}, - {StatIndex::branch_miss_ratio, - {/* name = */ "Branch Miss Ratio", - /* format = */ "{:3.1f}%", - /* scale_factor = */ 100.0f, - /* has_fixed_max = */ true, - /* max_value = */ 100.0f}}, - {StatIndex::gpu_cycles, - {/* name = */ "GPU Cycles", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::vertex_compute_cycles, - {/* name = */ "Vertex Compute Cycles", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::tiles, - {/* name = */ "Tiles", - /* format = */ "{:4.1f} k/s", - /* scale_factor = */ float(1e-3)}}, - {StatIndex::killed_tiles, - {/* name = */ "Tiles killed by CRC match", - /* format = */ "{:4.1f} k/s", - /* scale_factor = */ float(1e-3)}}, - {StatIndex::fragment_cycles, - {/* name = */ "Fragment Cycles", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::fragment_jobs, - {/* name = */ "Fragment Jobs", - /* format = */ "{:4.0f}/s"}}, - {StatIndex::tex_cycles, - {/* name = */ "Shader Texture Cycles", - /* format = */ "{:4.0f} k/s", - /* scale_factor = */ float(1e-3)}}, - {StatIndex::l2_ext_reads, - {/* name = */ "External Reads", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::l2_ext_writes, - {/* name = */ "External Writes", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::l2_ext_read_stalls, - {/* name = */ "External Read Stalls", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::l2_ext_write_stalls, - {/* name = */ "External Write Stalls", - /* format = */ "{:4.1f} M/s", - /* scale_factor = */ float(1e-6)}}, - {StatIndex::l2_ext_read_bytes, - {/* name = */ "External Read Bytes", - /* format = */ "{:4.1f} MiB/s", - /* scale_factor = */ 1.0f / (1024.0f * 1024.0f)}}, - {StatIndex::l2_ext_write_bytes, - {/* name = */ "External Write Bytes", - /* format = */ "{:4.1f} MiB/s", - /* scale_factor = */ 1.0f / (1024.0f * 1024.0f)}}}; + std::map graph_map; float graph_height{50.0f}; @@ -339,7 +242,8 @@ class Gui * @param font_size The font size * @param explicit_update If true, update buffers every frame */ - Gui(VulkanSample &sample, const Window &window, const float font_size = 21.0f, bool explicit_update = false); + Gui(VulkanSample &sample, const Window &window, const Stats *stats = nullptr, + const float font_size = 21.0f, bool explicit_update = false); /** * @brief Destroys the Gui diff --git a/framework/pch.h b/framework/pch.h index fee7a71367..c45199a067 100644 --- a/framework/pch.h +++ b/framework/pch.h @@ -33,6 +33,6 @@ #include "resource_replay.h" #include "semaphore_pool.h" #include "spirv_reflection.h" -#include "stats.h" +#include "stats/stats.h" #include "timer.h" #include "vulkan_sample.h" diff --git a/framework/rendering/render_frame.h b/framework/rendering/render_frame.h index 8f8fcbd763..a7350f5d64 100644 --- a/framework/rendering/render_frame.h +++ b/framework/rendering/render_frame.h @@ -26,6 +26,7 @@ #include "core/command_pool.h" #include "core/device.h" #include "core/image.h" +#include "core/query_pool.h" #include "core/queue.h" #include "fence_pool.h" #include "rendering/render_target.h" diff --git a/framework/stats.cpp b/framework/stats.cpp deleted file mode 100644 index 12f77c8e52..0000000000 --- a/framework/stats.cpp +++ /dev/null @@ -1,356 +0,0 @@ -/* Copyright (c) 2018-2020, Arm Limited and Contributors - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 the "License"; - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "stats.h" - -#include "common/error.h" - -namespace vkb -{ -Stats::Stats(const std::set &enabled_stats, CounterSamplingConfig sampling_config, - const size_t buffer_size) : - enabled_stats(enabled_stats), - sampling_config(sampling_config), - stop_worker(std::make_unique>()) -{ - assert(buffer_size >= 2 && "Buffers size should be greater than 2"); - - for (const auto &stat : enabled_stats) - { - counters[stat] = std::vector(buffer_size, 0); - } - - stat_data = { - {StatIndex::frame_times, {StatScaling::None}}, - {StatIndex::cpu_cycles, {hwcpipe::CpuCounter::Cycles}}, - {StatIndex::cpu_instructions, {hwcpipe::CpuCounter::Instructions}}, - {StatIndex::cache_miss_ratio, {hwcpipe::CpuCounter::CacheMisses, StatScaling::ByCounter, hwcpipe::CpuCounter::CacheReferences}}, - {StatIndex::branch_miss_ratio, {hwcpipe::CpuCounter::BranchMisses, StatScaling::ByCounter, hwcpipe::CpuCounter::BranchInstructions}}, - {StatIndex::gpu_cycles, {hwcpipe::GpuCounter::GpuCycles}}, - {StatIndex::vertex_compute_cycles, {hwcpipe::GpuCounter::VertexComputeCycles}}, - {StatIndex::tiles, {hwcpipe::GpuCounter::Tiles}}, - {StatIndex::killed_tiles, {hwcpipe::GpuCounter::TransactionEliminations}}, - {StatIndex::fragment_cycles, {hwcpipe::GpuCounter::FragmentCycles}}, - {StatIndex::fragment_jobs, {hwcpipe::GpuCounter::FragmentJobs}}, - {StatIndex::l2_reads_lookups, {hwcpipe::GpuCounter::CacheReadLookups}}, - {StatIndex::l2_ext_reads, {hwcpipe::GpuCounter::ExternalMemoryReadAccesses}}, - {StatIndex::l2_writes_lookups, {hwcpipe::GpuCounter::CacheWriteLookups}}, - {StatIndex::l2_ext_writes, {hwcpipe::GpuCounter::ExternalMemoryWriteAccesses}}, - {StatIndex::l2_ext_read_stalls, {hwcpipe::GpuCounter::ExternalMemoryReadStalls}}, - {StatIndex::l2_ext_write_stalls, {hwcpipe::GpuCounter::ExternalMemoryWriteStalls}}, - {StatIndex::l2_ext_read_bytes, {hwcpipe::GpuCounter::ExternalMemoryReadBytes}}, - {StatIndex::l2_ext_write_bytes, {hwcpipe::GpuCounter::ExternalMemoryWriteBytes}}, - {StatIndex::tex_cycles, {hwcpipe::GpuCounter::ShaderTextureCycles}}, - }; - - hwcpipe::CpuCounterSet enabled_cpu_counters{}; - hwcpipe::GpuCounterSet enabled_gpu_counters{}; - - for (const auto &stat : enabled_stats) - { - auto res = stat_data.find(stat); - if (res != stat_data.end()) - { - switch (res->second.type) - { - case StatType::Cpu: - enabled_cpu_counters.insert(res->second.cpu_counter); - - if (res->second.divisor_cpu_counter != hwcpipe::CpuCounter::MaxValue) - { - enabled_cpu_counters.insert(res->second.divisor_cpu_counter); - } - break; - case StatType::Gpu: - enabled_gpu_counters.insert(res->second.gpu_counter); - - if (res->second.divisor_gpu_counter != hwcpipe::GpuCounter::MaxValue) - { - enabled_gpu_counters.insert(res->second.divisor_gpu_counter); - } - break; - default: - break; - } - } - } - - hwcpipe = std::make_unique(enabled_cpu_counters, enabled_gpu_counters); - hwcpipe->run(); - - if (sampling_config.mode == CounterSamplingMode::Continuous) - { - worker_thread = std::thread([this] { - continuous_sampling_worker(stop_worker->get_future()); - }); - - // Reduce smoothing for continuous sampling - alpha_smoothing = 0.6f; - } -} - -Stats::~Stats() -{ - if (stop_worker) - { - stop_worker->set_value(); - } - - if (worker_thread.joinable()) - { - worker_thread.join(); - } -} - -void Stats::resize(const size_t width) -{ - // The circular buffer size will be 1/16th of the width of the screen - // which means every sixteen pixels represent one graph value - size_t buffers_size = width >> 4; - - for (auto &counter : counters) - { - counter.second.resize(buffers_size); - counter.second.shrink_to_fit(); - } -} - -bool Stats::is_available(const StatIndex index) const -{ - const auto &data = stat_data.find(index); - if (data == stat_data.end()) - { - return false; - } - - switch (data->second.type) - { - case StatType::Cpu: - { - if (hwcpipe->cpu_profiler()) - { - const auto &cpu_supp = hwcpipe->cpu_profiler()->supported_counters(); - return cpu_supp.find(data->second.cpu_counter) != cpu_supp.end(); - } - break; - } - case StatType::Gpu: - { - if (hwcpipe->gpu_profiler()) - { - const auto &gpu_supp = hwcpipe->gpu_profiler()->supported_counters(); - return gpu_supp.find(data->second.gpu_counter) != gpu_supp.end(); - } - break; - } - case StatType::Other: - { - return true; - } - } - - return false; -} - -void add_smoothed_value(std::vector &values, float value, float alpha) -{ - assert(values.size() >= 2 && "Buffers size should be greater than 2"); - - if (values.size() == values.capacity()) - { - // Shift values to the left to make space at the end and update counters - std::rotate(values.begin(), values.begin() + 1, values.end()); - } - - // Use an exponential moving average to smooth values - values.back() = value * alpha + *(values.end() - 2) * (1.0f - alpha); -} - -void Stats::update(float delta_time) -{ - switch (sampling_config.mode) - { - case CounterSamplingMode::Polling: - { - auto m = hwcpipe->sample(); - pending_samples = {{m.cpu ? *m.cpu : hwcpipe::CpuMeasurements{}, - m.gpu ? *m.gpu : hwcpipe::GpuMeasurements{}, - delta_time}}; - break; - } - case CounterSamplingMode::Continuous: - { - // Check that we have no pending samples to be shown - if (pending_samples.size() == 0) - { - std::unique_lock lock(continuous_sampling_mutex); - if (!should_add_to_continuous_samples) - { - // If we have no pending samples, we let the worker thread - // capture samples for the next frame - should_add_to_continuous_samples = true; - } - else - { - // The worker thread has captured a frame, so we stop it - // and read the samples - should_add_to_continuous_samples = false; - pending_samples = continuous_samples; - continuous_samples.clear(); - } - } - - // Ensure the number of pending samples is capped at a reasonable value - if (pending_samples.size() > 100) - { - pending_samples.resize(100); - } - break; - } - } - - // Handle delta time counter - auto delta_time_counter = counters.find(StatIndex::frame_times); - if (delta_time_counter != counters.end()) - { - add_smoothed_value(delta_time_counter->second, delta_time, alpha_smoothing); - } - - if (pending_samples.size() == 0) - { - return; - } - - // Compute the number of samples to show this frame - size_t sample_count = static_cast(sampling_config.speed * delta_time) * pending_samples.size(); - - // Clamp the number of samples - sample_count = std::max(1, std::min(sample_count, pending_samples.size())); - - // Push the samples to circular buffers - std::for_each(pending_samples.end() - sample_count, pending_samples.end(), [this](const auto &s) { - this->push_sample(s); - }); - pending_samples.erase(pending_samples.end() - sample_count, pending_samples.end()); -} - -void Stats::continuous_sampling_worker(std::future should_terminate) -{ - worker_timer.tick(); - hwcpipe->sample(); - - while (should_terminate.wait_for(std::chrono::seconds(0)) != std::future_status::ready) - { - auto delta_time = static_cast(worker_timer.tick()); - auto interval = std::chrono::duration_cast>(sampling_config.interval).count(); - - // Ensure we wait for the interval specified in config - if (delta_time < interval) - { - std::this_thread::sleep_for(std::chrono::duration(interval - delta_time)); - delta_time += static_cast(worker_timer.tick()); - } - - // Sample counters - const auto measurements = hwcpipe->sample(); - - // Add the new sample to the vector of continuous samples - { - std::unique_lock lock(continuous_sampling_mutex); - continuous_samples.push_back({measurements.cpu ? *measurements.cpu : hwcpipe::CpuMeasurements{}, - measurements.gpu ? *measurements.gpu : hwcpipe::GpuMeasurements{}, - delta_time}); - } - } -} - -void Stats::push_sample(const MeasurementSample &sample) -{ - for (auto &c : counters) - { - auto &values = c.second; - - const auto data = stat_data.find(c.first); - if (data == stat_data.end()) - { - continue; - } - - float measurement = 0; - switch (data->second.type) - { - case StatType::Cpu: - { - const auto &cpu_res = sample.cpu.find(data->second.cpu_counter); - if (cpu_res != sample.cpu.end()) - { - measurement = cpu_res->second.get(); - } - - if (data->second.scaling == StatScaling::ByCounter) - { - const auto &divisor_cpu_res = sample.cpu.find(data->second.divisor_cpu_counter); - if (divisor_cpu_res != sample.cpu.end()) - { - measurement /= divisor_cpu_res->second.get(); - } - else - { - measurement = 0; - } - } - break; - } - case StatType::Gpu: - { - const auto &gpu_res = sample.gpu.find(data->second.gpu_counter); - if (gpu_res != sample.gpu.end()) - { - measurement = gpu_res->second.get(); - } - - if (data->second.scaling == StatScaling::ByCounter) - { - const auto &divisor_gpu_res = sample.gpu.find(data->second.divisor_gpu_counter); - if (divisor_gpu_res != sample.gpu.end()) - { - measurement /= divisor_gpu_res->second.get(); - } - else - { - measurement = 0; - } - } - break; - } - default: - { - // Skip to next counter - continue; - } - } - - if (data->second.scaling == StatScaling::ByDeltaTime) - { - measurement /= sample.delta_time; - } - - add_smoothed_value(values, measurement, alpha_smoothing); - } -} - -} // namespace vkb diff --git a/framework/stats.h b/framework/stats.h deleted file mode 100644 index 62380fa071..0000000000 --- a/framework/stats.h +++ /dev/null @@ -1,276 +0,0 @@ -/* Copyright (c) 2018-2020, Arm Limited and Contributors - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 the "License"; - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common/error.h" - -VKBP_DISABLE_WARNINGS() -#include -VKBP_ENABLE_WARNINGS() - -#include "timer.h" - -namespace vkb -{ -/** - * @brief Handles of stats to be optionally enabled in @ref Stats - */ -enum class StatIndex -{ - frame_times, - cpu_cycles, - cpu_instructions, - cache_miss_ratio, - branch_miss_ratio, - gpu_cycles, - vertex_compute_cycles, - tiles, - killed_tiles, - fragment_jobs, - fragment_cycles, - l2_reads_lookups, - l2_ext_reads, - l2_writes_lookups, - l2_ext_writes, - l2_ext_read_stalls, - l2_ext_write_stalls, - l2_ext_read_bytes, - l2_ext_write_bytes, - tex_cycles -}; - -struct StatIndexHash -{ - template - std::size_t operator()(T t) const - { - return static_cast(t); - } -}; - -enum class StatType -{ - Cpu, - Gpu, - Other -}; - -enum class StatScaling -{ - // The stat is not scaled - None, - - // The stat is scaled by delta time, useful for per-second values - ByDeltaTime, - - // The stat is scaled by another counter, useful for ratios - ByCounter -}; - -struct StatData -{ - StatType type; - StatScaling scaling; - hwcpipe::CpuCounter cpu_counter; - hwcpipe::CpuCounter divisor_cpu_counter; - hwcpipe::GpuCounter gpu_counter; - hwcpipe::GpuCounter divisor_gpu_counter; - - /** - * @brief Constructor for simple stats that do not use any counter - * @param stat_scaling The scaling to be applied to the stat - */ - StatData(StatScaling stat_scaling = StatScaling::ByDeltaTime) : - type(StatType::Other), - scaling(stat_scaling) - {} - - /** - * @brief Constructor for CPU counters - * @param c The CPU counter to be gathered - * @param stat_scaling The scaling to be applied to the stat - * @param divisor The CPU counter to be used as divisor if scaling is ByCounter - */ - StatData(hwcpipe::CpuCounter c, StatScaling stat_scaling = StatScaling::ByDeltaTime, - hwcpipe::CpuCounter divisor = hwcpipe::CpuCounter::MaxValue) : - type(StatType::Cpu), - scaling(stat_scaling), - cpu_counter(c), - divisor_cpu_counter(divisor) - {} - - /** - * @brief Constructor for GPU counters - * @param c The GPU counter to be gathered - * @param stat_scaling The scaling to be applied to the stat - * @param divisor The GPU counter to be used as divisor if scaling is ByCounter - */ - StatData(hwcpipe::GpuCounter c, StatScaling stat_scaling = StatScaling::ByDeltaTime, - hwcpipe::GpuCounter divisor = hwcpipe::GpuCounter::MaxValue) : - type(StatType::Gpu), - scaling(stat_scaling), - gpu_counter(c), - divisor_gpu_counter(divisor) - {} -}; - -using StatDataMap = std::unordered_map; - -enum class CounterSamplingMode -{ - /// Sample counters only when calling update() - Polling, - /// Sample counters continuously, update circular buffers when calling update() - Continuous -}; - -struct CounterSamplingConfig -{ - /// Sampling mode (polling or continuous) - CounterSamplingMode mode; - - /// Sampling interval in continuous mode - std::chrono::milliseconds interval{1}; - - /// Speed of circular buffer updates in continuous mode; - /// at speed = 1.0f a new sample is displayed over 1 second. - float speed{0.5f}; -}; - -/* - * @brief Helper class for querying statistics about the CPU and the GPU - */ -class Stats -{ - public: - /** - * @brief Constructs a Stats object - * @param enabled_stats Set of stats to be collected - * @param sampling_config Sampling mode configuration (polling or continuous) - * @param buffer_size Size of the circular buffers - */ - Stats(const std::set &enabled_stats, - CounterSamplingConfig sampling_config = {CounterSamplingMode::Polling}, - size_t buffer_size = 16); - - /** - * @brief Destroys the Stats object - */ - ~Stats(); - - /** - * @brief Resizes the stats buffers according to the width of the screen - * @param width The width of the screen - */ - void resize(size_t width); - - /** - * @brief Checks if an enabled stat is available in the current platform - * @param index The stat index - * @return True if the stat is available, false otherwise - */ - bool is_available(StatIndex index) const; - - /** - * @param index The stat index of the data requested - * @return The data of the specified stat - */ - const std::vector &get_data(StatIndex index) const - { - return counters.at(index); - }; - - /** - * @return The enabled stats - */ - const std::set &get_enabled_stats() const - { - return enabled_stats; - } - - /** - * @brief Update statistics, must be called after every frame - */ - void update(float delta_time); - - private: - struct MeasurementSample - { - hwcpipe::CpuMeasurements cpu{}; - hwcpipe::GpuMeasurements gpu{}; - float delta_time{0.0f}; - }; - - /// Stats to be enabled - std::set enabled_stats; - - /// Counter sampling configuration - CounterSamplingConfig sampling_config; - - /// Mapping of stats to their availability and value getters - StatDataMap stat_data; - - /// Timer used in the main thread to compute delta time - Timer main_timer; - - /// Timer used by the worker thread to throttle counter sampling - Timer worker_timer; - - /// Alpha smoothing for running average - float alpha_smoothing{0.2f}; - - /// Circular buffers for counter data - std::map> counters{}; - - /// Profiler to gather CPU and GPU performance data - std::unique_ptr hwcpipe{}; - - /// Worker thread for continuous sampling - std::thread worker_thread; - - /// Promise to stop the worker thread - std::unique_ptr> stop_worker; - - /// A mutex for accessing measurements during continuous sampling - std::mutex continuous_sampling_mutex; - - /// The samples read during continuous sampling - std::vector continuous_samples; - - /// A flag specifying if the worker thread should add entries to continuous_samples - bool should_add_to_continuous_samples{false}; - - /// The samples waiting to be displayed - std::vector pending_samples; - - /// The worker thread function for continuous sampling; - /// it adds a new entry to continuous_samples at every interval - void continuous_sampling_worker(std::future should_terminate); - - /// Updates circular buffers for CPU and GPU counters - void push_sample(const MeasurementSample &sample); -}; - -} // namespace vkb diff --git a/framework/stats/frame_time_stats_provider.cpp b/framework/stats/frame_time_stats_provider.cpp new file mode 100644 index 0000000000..01e104bc2a --- /dev/null +++ b/framework/stats/frame_time_stats_provider.cpp @@ -0,0 +1,45 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "frame_time_stats_provider.h" + +#include + +namespace vkb +{ +FrameTimeStatsProvider::FrameTimeStatsProvider(std::set &requested_stats) +{ + // We always, and only, support StatIndex::frame_times since it's handled directly by us. + // Remove from requested set to stop other providers looking for it. + requested_stats.erase(StatIndex::frame_times); +} + +bool FrameTimeStatsProvider::is_available(StatIndex index) const +{ + // We only support StatIndex::frame_times + return index == StatIndex::frame_times; +} + +StatsProvider::Sample FrameTimeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +{ + Sample res; + // frame_times comes directly from delta_time + res[StatIndex::frame_times].result = delta_time; + return res; +} + +} // namespace vkb diff --git a/framework/stats/frame_time_stats_provider.h b/framework/stats/frame_time_stats_provider.h new file mode 100644 index 0000000000..dec35bfd54 --- /dev/null +++ b/framework/stats/frame_time_stats_provider.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "stats_provider.h" + +namespace vkb +{ +class FrameTimeStatsProvider : public StatsProvider +{ + public: + /** + * @brief Constructs a FrameTimeStatsProvider + * @param requested_stats Set of stats to be collected. Supported stats will be removed from the set. + */ + FrameTimeStatsProvider(std::set &requested_stats); + /** + * @brief Checks if this provider can supply the given enabled stat + * @param index The stat index + * @return True if the stat is available, false otherwise + */ + bool is_available(StatIndex index) const override; + + /** + * @brief Retrieve a new sample set + * @param delta_time Time since last sample + * @param active_frame_idx Which of the framebuffers is active + */ + Sample sample(float delta_time, uint32_t active_frame_idx) override; +}; +} // namespace vkb diff --git a/framework/stats/hwcpipe_stats_provider.cpp b/framework/stats/hwcpipe_stats_provider.cpp new file mode 100644 index 0000000000..53b20ad5a1 --- /dev/null +++ b/framework/stats/hwcpipe_stats_provider.cpp @@ -0,0 +1,218 @@ +/* Copyright (c) 2018-2020, Arm Limited and Contributors + * Copyright (c) 2020, Broadcom Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/error.h" + +#include "hwcpipe_stats_provider.h" + +namespace vkb +{ +HWCPipeStatsProvider::HWCPipeStatsProvider(std::set & requested_stats, + CounterSamplingConfig sampling_config) : + sampling_config(sampling_config) +{ + // Mapping of stats to their hwcpipe availability + // clang-format off + StatDataMap hwcpipe_stats = { + {StatIndex::cpu_cycles, {hwcpipe::CpuCounter::Cycles}}, + {StatIndex::cpu_instructions, {hwcpipe::CpuCounter::Instructions}}, + {StatIndex::cpu_cache_miss_ratio, {hwcpipe::CpuCounter::CacheMisses, StatScaling::ByCounter, hwcpipe::CpuCounter::CacheReferences}}, + {StatIndex::cpu_branch_miss_ratio, {hwcpipe::CpuCounter::BranchMisses, StatScaling::ByCounter, hwcpipe::CpuCounter::BranchInstructions}}, + {StatIndex::gpu_cycles, {hwcpipe::GpuCounter::GpuCycles}}, + {StatIndex::gpu_vertex_cycles, {hwcpipe::GpuCounter::VertexComputeCycles}}, + {StatIndex::gpu_tiles, {hwcpipe::GpuCounter::Tiles}}, + {StatIndex::gpu_killed_tiles, {hwcpipe::GpuCounter::TransactionEliminations}}, + {StatIndex::gpu_fragment_cycles, {hwcpipe::GpuCounter::FragmentCycles}}, + {StatIndex::gpu_fragment_jobs, {hwcpipe::GpuCounter::FragmentJobs}}, + {StatIndex::gpu_ext_reads, {hwcpipe::GpuCounter::ExternalMemoryReadAccesses}}, + {StatIndex::gpu_ext_writes, {hwcpipe::GpuCounter::ExternalMemoryWriteAccesses}}, + {StatIndex::gpu_ext_read_stalls, {hwcpipe::GpuCounter::ExternalMemoryReadStalls}}, + {StatIndex::gpu_ext_write_stalls, {hwcpipe::GpuCounter::ExternalMemoryWriteStalls}}, + {StatIndex::gpu_ext_read_bytes, {hwcpipe::GpuCounter::ExternalMemoryReadBytes}}, + {StatIndex::gpu_ext_write_bytes, {hwcpipe::GpuCounter::ExternalMemoryWriteBytes}}, + {StatIndex::gpu_tex_cycles, {hwcpipe::GpuCounter::ShaderTextureCycles}}}; + // clang-format on + + hwcpipe::CpuCounterSet enabled_cpu_counters{}; + hwcpipe::GpuCounterSet enabled_gpu_counters{}; + + for (const auto &stat : requested_stats) + { + auto res = hwcpipe_stats.find(stat); + if (res != hwcpipe_stats.end()) + { + stat_data[stat] = hwcpipe_stats[stat]; + + switch (res->second.type) + { + case StatType::Cpu: + enabled_cpu_counters.insert(res->second.cpu_counter); + if (res->second.divisor_cpu_counter != hwcpipe::CpuCounter::MaxValue) + enabled_cpu_counters.insert(res->second.divisor_cpu_counter); + break; + case StatType::Gpu: + enabled_gpu_counters.insert(res->second.gpu_counter); + if (res->second.divisor_gpu_counter != hwcpipe::GpuCounter::MaxValue) + enabled_gpu_counters.insert(res->second.divisor_gpu_counter); + break; + } + } + } + + hwcpipe = std::make_unique(enabled_cpu_counters, enabled_gpu_counters); + + // Now that we've made a hwcpipe with the counters we'd like, remove any that + // aren't actually supported + for (auto iter = stat_data.begin(); iter != stat_data.end();) + { + switch (iter->second.type) + { + case StatType::Cpu: + { + if (hwcpipe->cpu_profiler()) + { + const auto &cpu_supp = hwcpipe->cpu_profiler()->supported_counters(); + if (cpu_supp.find(iter->second.cpu_counter) == cpu_supp.end()) + iter = stat_data.erase(iter); + else + ++iter; + } + else + iter = stat_data.erase(iter); + break; + } + case StatType::Gpu: + { + if (hwcpipe->gpu_profiler()) + { + const auto &gpu_supp = hwcpipe->gpu_profiler()->supported_counters(); + if (gpu_supp.find(iter->second.gpu_counter) == gpu_supp.end()) + iter = stat_data.erase(iter); + else + ++iter; + } + else + iter = stat_data.erase(iter); + break; + } + } + } + + // Remove any supported stats from the requested set. + // Subsequent providers will then only look for things that aren't already supported. + for (const auto &iter : stat_data) + { + requested_stats.erase(iter.first); + } + + hwcpipe->run(); +} + +bool HWCPipeStatsProvider::is_available(StatIndex index) const +{ + return stat_data.find(index) != stat_data.end(); +} + +const StatGraphData &HWCPipeStatsProvider::get_graph_data(StatIndex index) const +{ + static StatGraphData vertex_compute_cycles{"Vertex Compute Cycles", "{:4.1f} M/s", float(1e-6)}; + + assert(is_available(index) && "HWCPipeStatsProvider::get_graph_data() called with invalid StatIndex"); + + // HWCPipe reports combined vertex/compute cycles (which is Arm specific) + // Ensure we report graph that with the correct name when asked for vertex cycles + if (index == StatIndex::gpu_vertex_cycles) + return vertex_compute_cycles; + + return def_graph_map[index]; +} + +static double get_cpu_counter_value(const hwcpipe::CpuMeasurements *cpu, hwcpipe::CpuCounter counter) +{ + auto hwcpipe_ctr = cpu->find(counter); + if (hwcpipe_ctr != cpu->end()) + return hwcpipe_ctr->second.get(); + return 0.0; +} + +static double get_gpu_counter_value(const hwcpipe::GpuMeasurements *gpu, hwcpipe::GpuCounter counter) +{ + auto hwcpipe_ctr = gpu->find(counter); + if (hwcpipe_ctr != gpu->end()) + return hwcpipe_ctr->second.get(); + return 0.0; +} + +StatsProvider::Sample HWCPipeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +{ + Sample res; + hwcpipe::Measurements m = hwcpipe->sample(); + + // Map from hwcpipe measurement to our sample result for each counter + for (auto iter : stat_data) + { + StatIndex index = iter.first; + const StatData &data = iter.second; + + double d = 0.0; + if (m.cpu) + { + d = get_cpu_counter_value(m.cpu, data.cpu_counter); + + if (data.scaling == StatScaling::ByDeltaTime && delta_time != 0.0f) + { + d /= delta_time; + } + else if (data.scaling == StatScaling::ByCounter) + { + double divisor = get_cpu_counter_value(m.cpu, data.divisor_cpu_counter); + if (divisor != 0.0) + d /= divisor; + else + d = 0.0; + } + } + if (m.gpu) + { + d = get_gpu_counter_value(m.gpu, data.gpu_counter); + + if (data.scaling == StatScaling::ByDeltaTime && delta_time != 0.0f) + { + d /= delta_time; + } + else if (data.scaling == StatScaling::ByCounter) + { + double divisor = get_gpu_counter_value(m.gpu, data.divisor_gpu_counter); + if (divisor != 0.0) + d /= divisor; + else + d = 0.0; + } + } + res[index].result = d; + } + + return res; +} + +StatsProvider::Sample HWCPipeStatsProvider::continuous_sample(float delta_time) +{ + return sample(delta_time, 0); +} + +} // namespace vkb diff --git a/framework/stats/hwcpipe_stats_provider.h b/framework/stats/hwcpipe_stats_provider.h new file mode 100644 index 0000000000..7ee2eb25e0 --- /dev/null +++ b/framework/stats/hwcpipe_stats_provider.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2018-2020, Arm Limited and Contributors + * Copyright (c) 2020, Broadcom Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "common/error.h" +#include "common/vk_common.h" + +VKBP_DISABLE_WARNINGS() +#include +VKBP_ENABLE_WARNINGS() + +#include "stats_provider.h" + +namespace vkb +{ +class HWCPipeStatsProvider : public StatsProvider +{ + private: + enum class StatType + { + Cpu, + Gpu + }; + + struct StatData + { + StatType type; + StatScaling scaling; + hwcpipe::CpuCounter cpu_counter; + hwcpipe::CpuCounter divisor_cpu_counter; + hwcpipe::GpuCounter gpu_counter; + hwcpipe::GpuCounter divisor_gpu_counter; + + StatData() = default; + + /** + * @brief Constructor for CPU counters + * @param c The CPU counter to be gathered + * @param stat_scaling The scaling to be applied to the stat + * @param divisor The CPU counter to be used as divisor if scaling is ByCounter + */ + StatData(hwcpipe::CpuCounter c, + StatScaling stat_scaling = StatScaling::ByDeltaTime, + hwcpipe::CpuCounter divisor = hwcpipe::CpuCounter::MaxValue) : + type(StatType::Cpu), + scaling(stat_scaling), + cpu_counter(c), + divisor_cpu_counter(divisor) + {} + + /** + * @brief Constructor for GPU counters + * @param c The GPU counter to be gathered + * @param stat_scaling The scaling to be applied to the stat + * @param divisor The GPU counter to be used as divisor if scaling is ByCounter + */ + StatData(hwcpipe::GpuCounter c, + StatScaling stat_scaling = StatScaling::ByDeltaTime, + hwcpipe::GpuCounter divisor = hwcpipe::GpuCounter::MaxValue) : + type(StatType::Gpu), + scaling(stat_scaling), + gpu_counter(c), + divisor_gpu_counter(divisor) + {} + }; + + using StatDataMap = std::unordered_map; + + public: + /** + * @brief Constructs a HWCPipeStateProvider + * @param requested_stats Set of stats to be collected. Supported stats will be removed from the set. + */ + HWCPipeStatsProvider(std::set &requested_stats, CounterSamplingConfig sampling_config); + + /** + * @brief Checks if this provider can supply the given enabled stat + * @param index The stat index + * @return True if the stat is available, false otherwise + */ + bool is_available(StatIndex index) const override; + + /** + * @brief Retrieve graphing data for the given enabled stat + * @param index The stat index + */ + const StatGraphData &get_graph_data(StatIndex index) const override; + + /** + * @brief Retrieve a new sample set from polled sampling + * @param delta_time Time since last sample + * @param active_frame_idx Which of the framebuffers is active + */ + Sample sample(float delta_time, uint32_t active_frame_idx) override; + + /** + * @brief Retrieve a new sample set from continuous sampling + * @param delta_time Time since last sample + */ + Sample continuous_sample(float delta_time) override; + + private: + // The hwcpipe instance + std::unique_ptr hwcpipe{}; + + // Only stats which are available and were requested end up in stat_data + StatDataMap stat_data; + + // Counter sampling configuration + CounterSamplingConfig sampling_config; +}; + +} // namespace vkb diff --git a/framework/stats/stats.cpp b/framework/stats/stats.cpp new file mode 100644 index 0000000000..74d3a126e4 --- /dev/null +++ b/framework/stats/stats.cpp @@ -0,0 +1,273 @@ +/* Copyright (c) 2018-2020, Arm Limited and Contributors + * Copyright (c) 2020, Broadcom Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stats/stats.h" +#include "common/error.h" +#include "core/device.h" + +#include "frame_time_stats_provider.h" +#include "hwcpipe_stats_provider.h" +#include "vulkan_stats_provider.h" +namespace vkb +{ +Stats::Stats(Device &device, size_t num_framebuffers, const std::set &requested_stats, + CounterSamplingConfig sampling_config, size_t buffer_size) : + requested_stats(requested_stats), + sampling_config(sampling_config), + stop_worker(std::make_unique>()) +{ + assert(buffer_size >= 2 && "Buffers size should be greater than 2"); + + // Copy the requested stats, so they can be changed by the providers below + std::set stats = requested_stats; + + // Initialize our list of providers (in priority order) + // All supported stats will be removed from the given 'stats' set by the provider's constructor + // so subsequent providers only see requests for stats that aren't already supported. + providers.emplace_back(std::make_unique(stats)); + providers.emplace_back(std::make_unique(stats, sampling_config)); + providers.emplace_back(std::make_unique(device, stats, sampling_config, num_framebuffers)); + + // In continuous sampling mode we still need to update the frame times as if we are polling + // Store the frame time provider here so we can easily access it later. + frame_time_provider = providers[0].get(); + + for (const auto &stat : requested_stats) + { + counters[stat] = std::vector(buffer_size, 0); + } + + if (sampling_config.mode == CounterSamplingMode::Continuous) + { + // Start a thread for continuous sample capture + worker_thread = std::thread([this] { + continuous_sampling_worker(stop_worker->get_future()); + }); + + // Reduce smoothing for continuous sampling + alpha_smoothing = 0.6f; + } +} + +Stats::~Stats() +{ + if (stop_worker) + { + stop_worker->set_value(); + } + + if (worker_thread.joinable()) + { + worker_thread.join(); + } +} + +void Stats::resize(const size_t width) +{ + // The circular buffer size will be 1/16th of the width of the screen + // which means every sixteen pixels represent one graph value + size_t buffers_size = width >> 4; + + for (auto &counter : counters) + { + counter.second.resize(buffers_size); + counter.second.shrink_to_fit(); + } +} + +bool Stats::is_available(const StatIndex index) const +{ + for (const auto &p : providers) + if (p->is_available(index)) + return true; + return false; +} + +static void add_smoothed_value(std::vector &values, float value, float alpha) +{ + assert(values.size() >= 2 && "Buffers size should be greater than 2"); + + if (values.size() == values.capacity()) + { + // Shift values to the left to make space at the end and update counters + std::rotate(values.begin(), values.begin() + 1, values.end()); + } + + // Use an exponential moving average to smooth values + values.back() = value * alpha + *(values.end() - 2) * (1.0f - alpha); +} + +void Stats::update(float delta_time, uint32_t active_frame_idx) +{ + switch (sampling_config.mode) + { + case CounterSamplingMode::Polling: + { + StatsProvider::Sample sample; + + for (auto &p : providers) + { + auto s = p->sample(delta_time, active_frame_idx); + sample.insert(s.begin(), s.end()); + } + push_sample(sample); + break; + } + case CounterSamplingMode::Continuous: + { + // Check that we have no pending samples to be shown + if (pending_samples.size() == 0) + { + std::unique_lock lock(continuous_sampling_mutex); + if (!should_add_to_continuous_samples) + { + // If we have no pending samples, we let the worker thread + // capture samples for the next frame + should_add_to_continuous_samples = true; + } + else + { + // The worker thread has captured a frame, so we stop it + // and read the samples + should_add_to_continuous_samples = false; + pending_samples = continuous_samples; + continuous_samples.clear(); + } + } + + if (pending_samples.size() == 0) + return; + + // Ensure the number of pending samples is capped at a reasonable value + if (pending_samples.size() > 100) + pending_samples.resize(100); + + // Compute the number of samples to show this frame + size_t sample_count = static_cast(sampling_config.speed * delta_time) * pending_samples.size(); + + // Clamp the number of samples + sample_count = std::max(1, std::min(sample_count, pending_samples.size())); + + // Get the frame time stats (not a continuous stat) + StatsProvider::Sample frame_time_sample = frame_time_provider->sample(delta_time, active_frame_idx); + + // Push the samples to circular buffers + std::for_each(pending_samples.end() - sample_count, pending_samples.end(), [this, frame_time_sample](auto &s) { + // Write the correct frame time into the continuous stats + s.insert(frame_time_sample.begin(), frame_time_sample.end()); + // Then push the sample to the counters list + this->push_sample(s); + }); + pending_samples.erase(pending_samples.end() - sample_count, pending_samples.end()); + + break; + } + } +} + +void Stats::continuous_sampling_worker(std::future should_terminate) +{ + worker_timer.tick(); + + for (auto &p : providers) + p->continuous_sample(0.0f); + + while (should_terminate.wait_for(std::chrono::seconds(0)) != std::future_status::ready) + { + auto delta_time = static_cast(worker_timer.tick()); + auto interval = std::chrono::duration_cast>(sampling_config.interval).count(); + + // Ensure we wait for the interval specified in config + if (delta_time < interval) + { + std::this_thread::sleep_for(std::chrono::duration(interval - delta_time)); + delta_time += static_cast(worker_timer.tick()); + } + + // Sample counters + StatsProvider::Sample sample; + for (auto &p : providers) + { + StatsProvider::Sample s = p->continuous_sample(delta_time); + sample.insert(s.begin(), s.end()); + } + + // Add the new sample to the vector of continuous samples + { + std::unique_lock lock(continuous_sampling_mutex); + continuous_samples.push_back(sample); + } + } +} + +void Stats::push_sample(const StatsProvider::Sample &sample) +{ + for (auto &c : counters) + { + StatIndex idx = c.first; + std::vector &values = c.second; + + // Find the counter matching this StatIndex in the Sample + const auto &smp = sample.find(idx); + if (smp == sample.end()) + continue; + + float measurement = static_cast(smp->second.result); + + add_smoothed_value(values, measurement, alpha_smoothing); + } +} + +void Stats::command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) +{ + // Inform the providers + for (auto &p : providers) + p->command_buffer_begun(cb, active_frame_idx); +} + +void Stats::command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) +{ + // Inform the providers + for (auto &p : providers) + p->command_buffer_ending(cb, active_frame_idx); +} + +const StatGraphData &Stats::get_graph_data(StatIndex index) const +{ + for (auto &p : providers) + { + if (p->is_available(index)) + return p->get_graph_data(index); + } + return StatsProvider::default_graph_data(index); +} + +StatGraphData::StatGraphData(const std::string &name, + const std::string &graph_label_format, + float scale_factor, + bool has_fixed_max, + float max_value) : + name(name), + format{graph_label_format}, + scale_factor{scale_factor}, + has_fixed_max{has_fixed_max}, + max_value{max_value} +{ +} + +} // namespace vkb diff --git a/framework/stats/stats.h b/framework/stats/stats.h new file mode 100644 index 0000000000..2c99080771 --- /dev/null +++ b/framework/stats/stats.h @@ -0,0 +1,169 @@ +/* Copyright (c) 2018-2020, Arm Limited and Contributors + * Copyright (c) 2020, Broadcom Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/error.h" + +#include "stats_common.h" +#include "stats_provider.h" +#include "timer.h" + +namespace vkb +{ +class Device; +class CommandBuffer; + +/* + * @brief Helper class for querying statistics about the CPU and the GPU + */ +class Stats +{ + public: + /** + * @brief Constructs a Stats object + * @param device Device on which to collect stats + * @param requested_stats Set of stats to be collected if available + * @param sampling_config Sampling mode configuration (polling or continuous) + * @param buffer_size Size of the circular buffers + */ + Stats(Device & device, + size_t num_framebuffers, + const std::set &requested_stats, + CounterSamplingConfig sampling_config = {CounterSamplingMode::Polling}, + size_t buffer_size = 16); + + /** + * @brief Destroys the Stats object + */ + ~Stats(); + + /** + * @brief Resizes the stats buffers according to the width of the screen + * @param width The width of the screen + */ + void resize(size_t width); + + /** + * @brief Checks if an enabled stat is available in the current platform + * @param index The stat index + * @return True if the stat is available, false otherwise + */ + bool is_available(StatIndex index) const; + + /** + * @brief Returns data relevant for graphing a specific statistic + * @param index The stat index of the data requested + * @return The data of the specified stat + */ + const StatGraphData &get_graph_data(StatIndex index) const; + + /** + * @brief Returns the collected data for a specific statistic + * @param index The stat index of the data requested + * @return The data of the specified stat + */ + const std::vector &get_data(StatIndex index) const + { + return counters.at(index); + }; + + /** + * @return The requested stats + */ + const std::set &get_requested_stats() const + { + return requested_stats; + } + + /** + * @brief Update statistics, must be called after every frame + */ + void update(float delta_time, uint32_t active_frame_idx); + + /** + * @brief A command buffer that we want stats about has just begun + * @param cb The command buffer + */ + void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx); + + /** + * @brief A command buffer that we want stats about is about to be ended + * @param cb The command buffer + */ + void command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx); + + private: + /// Stats that were requested - they may not all be available + std::set requested_stats; + + /// Provider that tracks frame times + StatsProvider *frame_time_provider; + + /// A list of stats providers to use in priority order + std::vector> providers; + + /// Counter sampling configuration + CounterSamplingConfig sampling_config; + + /// Timer used in the main thread to compute delta time + Timer main_timer; + + /// Timer used by the worker thread to throttle counter sampling + Timer worker_timer; + + /// Alpha smoothing for running average + float alpha_smoothing{0.2f}; + + /// Circular buffers for counter data + std::map> counters{}; + + /// Worker thread for continuous sampling + std::thread worker_thread; + + /// Promise to stop the worker thread + std::unique_ptr> stop_worker; + + /// A mutex for accessing measurements during continuous sampling + std::mutex continuous_sampling_mutex; + + /// The samples read during continuous sampling + std::vector continuous_samples; + + /// A flag specifying if the worker thread should add entries to continuous_samples + bool should_add_to_continuous_samples{false}; + + /// The samples waiting to be displayed + std::vector pending_samples; + + /// The worker thread function for continuous sampling; + /// it adds a new entry to continuous_samples at every interval + void continuous_sampling_worker(std::future should_terminate); + + /// Updates circular buffers for CPU and GPU counters + void push_sample(const StatsProvider::Sample &sample); +}; + +} // namespace vkb diff --git a/framework/stats/stats_common.h b/framework/stats/stats_common.h new file mode 100644 index 0000000000..3334fbb79e --- /dev/null +++ b/framework/stats/stats_common.h @@ -0,0 +1,120 @@ +/* Copyright (c) 2018-2020, Arm Limited and Contributors + * Copyright (c) 2020, Broadcom Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace vkb +{ +/** + * @brief Handles of stats to be optionally enabled in @ref Stats + */ +enum class StatIndex +{ + frame_times, + cpu_cycles, + cpu_instructions, + cpu_cache_miss_ratio, + cpu_branch_miss_ratio, + gpu_cycles, + gpu_vertex_cycles, + gpu_tiles, + gpu_killed_tiles, + gpu_fragment_jobs, + gpu_fragment_cycles, + gpu_ext_reads, + gpu_ext_writes, + gpu_ext_read_stalls, + gpu_ext_write_stalls, + gpu_ext_read_bytes, + gpu_ext_write_bytes, + gpu_tex_cycles, +}; + +struct StatIndexHash +{ + template + std::size_t operator()(T t) const + { + return static_cast(t); + } +}; + +enum class StatScaling +{ + // The stat is not scaled + None, + + // The stat is scaled by delta time, useful for per-second values + ByDeltaTime, + + // The stat is scaled by another counter, useful for ratios + ByCounter +}; + +enum class CounterSamplingMode +{ + /// Sample counters only when calling update() + Polling, + /// Sample counters continuously, update circular buffers when calling update() + Continuous +}; + +struct CounterSamplingConfig +{ + /// Sampling mode (polling or continuous) + CounterSamplingMode mode; + + /// Sampling interval in continuous mode + std::chrono::milliseconds interval{1}; + + /// Speed of circular buffer updates in continuous mode; + /// at speed = 1.0f a new sample is displayed over 1 second. + float speed{0.5f}; +}; + +// Per-statistic graph data +class StatGraphData +{ + public: + /** + * @brief Constructs data for the graph + * @param name Name of the Stat + * @param format Format of the label + * @param scale_factor Any scaling to apply to the data + * @param has_fixed_max Whether the data should have a fixed max value + * @param max_value The maximum value to use + */ + StatGraphData(const std::string &name, + const std::string &format, + float scale_factor = 1.0f, + bool has_fixed_max = false, + float max_value = 0.0f); + + StatGraphData() = default; + + std::string name; + std::string format; + float scale_factor; + bool has_fixed_max; + float max_value; +}; + +} // namespace vkb diff --git a/framework/stats/stats_provider.cpp b/framework/stats/stats_provider.cpp new file mode 100644 index 0000000000..53b59ad7e2 --- /dev/null +++ b/framework/stats/stats_provider.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stats_provider.h" + +namespace vkb +{ +// Default graphing values for stats. May be overridden by individual providers. +std::map StatsProvider::def_graph_map{ + // clang-format off + // StatIndex Name shown in graph Format Scale fixed_max max_value + {StatIndex::frame_times, {"Frame Times", "{:3.1f} ms", 1000.0f}}, + {StatIndex::cpu_cycles, {"CPU Cycles", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::cpu_instructions, {"CPU Instructions", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::cpu_cache_miss_ratio, {"Cache Miss Ratio", "{:3.1f}%", 100.0f, true, 100.0f}}, + {StatIndex::cpu_branch_miss_ratio, {"Branch Miss Ratio", "{:3.1f}%", 100.0f, true, 100.0f}}, + {StatIndex::gpu_cycles, {"GPU Cycles", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::gpu_vertex_cycles, {"Vertex Cycles", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::gpu_tiles, {"Tiles", "{:4.1f} k/s", float(1e-3)}}, + {StatIndex::gpu_killed_tiles, {"Tiles killed by CRC match", "{:4.1f} k/s", float(1e-3)}}, + {StatIndex::gpu_fragment_jobs, {"Fragment Jobs", "{:4.0f}/s"}}, + {StatIndex::gpu_fragment_cycles, {"Fragment Cycles", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::gpu_tex_cycles, {"Shader Texture Cycles", "{:4.0f} k/s", float(1e-3)}}, + {StatIndex::gpu_ext_reads, {"External Reads", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::gpu_ext_writes, {"External Writes", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::gpu_ext_read_stalls, {"External Read Stalls", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::gpu_ext_write_stalls, {"External Write Stalls", "{:4.1f} M/s", float(1e-6)}}, + {StatIndex::gpu_ext_read_bytes, {"External Read Bytes", "{:4.1f} MiB/s", 1.0f / (1024.0f * 1024.0f)}}, + {StatIndex::gpu_ext_write_bytes, {"External Write Bytes", "{:4.1f} MiB/s", 1.0f / (1024.0f * 1024.0f)}}, + // clang-format on +}; + +// Static +const StatGraphData &StatsProvider::default_graph_data(StatIndex index) +{ + return def_graph_map.at(index); +} + +} // namespace vkb diff --git a/framework/stats/stats_provider.h b/framework/stats/stats_provider.h new file mode 100644 index 0000000000..f091544428 --- /dev/null +++ b/framework/stats/stats_provider.h @@ -0,0 +1,106 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "stats_common.h" + +#include +#include +#include + +namespace vkb +{ +class CommandBuffer; + +/** + * @brief Abstract interface for all StatsProvider classes + */ +class StatsProvider +{ + public: + struct Counter + { + double result; + }; + + using Sample = std::unordered_map; + + /** + * @brief Virtual Destructor + */ + virtual ~StatsProvider() + {} + + /** + * @brief Checks if this provider can supply the given enabled stat + * @param index The stat index + * @return True if the stat is available, false otherwise + */ + virtual bool is_available(StatIndex index) const = 0; + + /** + * @brief Retrieve graphing data for the given enabled stat + * @param index The stat index + */ + virtual const StatGraphData &get_graph_data(StatIndex index) const + { + return def_graph_map.at(index); + } + + /** + * @brief Retrieve default graphing data for the given stat + * @param index The stat index + */ + static const StatGraphData &default_graph_data(StatIndex index); + + /** + * @brief Retrieve a new sample set + * @param delta_time Time since last sample + * @param active_frame_idx Which of the framebuffers is active + */ + virtual Sample sample(float delta_time, uint32_t active_frame_idx) = 0; + + /** + * @brief Retrieve a new sample set from continuous sampling + * @param delta_time Time since last sample + */ + virtual Sample continuous_sample(float delta_time) + { + return Sample(); + } + + /** + * @brief A command buffer that we want stats about has just begun + * @param cb The command buffer + * @param active_frame_idx Which of the framebuffers is active + */ + virtual void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) + {} + + /** + * @brief A command buffer that we want stats about is about to be ended + * @param cb The command buffer + * @param active_frame_idx Which of the framebuffers is active + */ + virtual void command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) + {} + + protected: + static std::map def_graph_map; +}; +} // namespace vkb diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp new file mode 100644 index 0000000000..cd5e0f4f88 --- /dev/null +++ b/framework/stats/vulkan_stats_provider.cpp @@ -0,0 +1,460 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/error.h" +#include "core/device.h" + +#include "core/command_buffer.h" +#include "vulkan_stats_provider.h" + +#include + +namespace vkb +{ +VulkanStatsProvider::VulkanStatsProvider(Device &device, std::set &requested_stats, + const CounterSamplingConfig &sampling_config, + size_t num_framebuffers) : + device(device) +{ + // Check all the Vulkan capabilities we require are present + if (!is_supported(sampling_config)) + return; + + const PhysicalDevice &gpu = device.get_gpu(); + + has_timestamps = gpu.get_properties().limits.timestampComputeAndGraphics; + timestamp_period = gpu.get_properties().limits.timestampPeriod; + + // Interrogate device for supported stats + uint32_t queue_family_index = device.get_queue_family_index(VK_QUEUE_GRAPHICS_BIT); + + // Query number of available counters + uint32_t count = 0; + VK_CHECK(vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + gpu.get_handle(), queue_family_index, &count, nullptr, nullptr)); + + if (count == 0) + return; // No counters available + + std::vector counters(count); + std::vector descs(count); + + for (uint32_t i = 0; i < count; i++) + { + counters[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_KHR; + counters[i].pNext = nullptr; + descs[i].sType = VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR; + descs[i].pNext = nullptr; + } + + // Now get the list of counters and their descriptions + VK_CHECK(vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + gpu.get_handle(), queue_family_index, &count, counters.data(), descs.data())); + + // Every vendor has a different set of performance counters each + // with different names. Match them to the stats we want, where available. + if (!fill_vendor_data()) + return; + + bool performance_impact = false; + + // Now build stat_data by matching vendor_data to Vulkan counter data + for (auto &s : vendor_data) + { + StatIndex index = s.first; + + if (requested_stats.find(index) == requested_stats.end()) + continue; // We weren't asked for this stat + + VendorStat &init = s.second; + bool found_ctr = false; + bool found_div = (init.divisor_name == ""); + uint32_t ctr_idx, div_idx; + + std::regex name_regex(init.name); + std::regex div_regex(init.divisor_name); + + for (uint32_t i = 0; !(found_ctr && found_div) && i < descs.size(); i++) + { + if (!found_ctr && std::regex_match(descs[i].name, name_regex)) + { + ctr_idx = i; + found_ctr = true; + } + if (!found_div && std::regex_match(descs[i].name, div_regex)) + { + div_idx = i; + found_div = true; + } + } + + if (found_ctr && found_div) + { + if ((descs[ctr_idx].flags & VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR) || + (init.divisor_name != "" && descs[div_idx].flags != VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR)) + { + performance_impact = true; + } + + // Record the counter data + counter_indices.emplace_back(ctr_idx); + if (init.divisor_name == "") + { + stat_data[index] = StatData(ctr_idx, counters[ctr_idx].storage); + } + else + { + counter_indices.emplace_back(div_idx); + stat_data[index] = StatData(ctr_idx, counters[ctr_idx].storage, init.scaling, + div_idx, counters[div_idx].storage); + } + } + } + + if (performance_impact) + LOGW("The collection of performance counters may impact performance"); + + if (counter_indices.size() == 0) + return; // No stats available + + // Acquire the profiling lock, without which we can't collect stats + VkAcquireProfilingLockInfoKHR info{}; + info.sType = VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR; + info.timeout = 2000000000; // 2 seconds (in ns) + + if (vkAcquireProfilingLockKHR(device.get_handle(), &info) != VK_SUCCESS) + { + stat_data.clear(); + counter_indices.clear(); + LOGW("Profiling lock acquisition timed-out"); + return; + } + + // Now we know the counters and that we can collect them, make a query pool for the results. + if (!create_query_pools(num_framebuffers, queue_family_index)) + { + stat_data.clear(); + counter_indices.clear(); + return; + } + + // These stats are fully supported by this provider and in a single pass, so remove + // from the requested set. + // Subsequent providers will then only look for things that aren't already supported. + for (const auto &s : stat_data) + requested_stats.erase(s.first); +} + +VulkanStatsProvider::~VulkanStatsProvider() +{ + if (stat_data.size() > 0) + { + // Release profiling lock + vkReleaseProfilingLockKHR(device.get_handle()); + } +} + +bool VulkanStatsProvider::fill_vendor_data() +{ + const auto &pd_props = device.get_gpu().get_properties(); + if (pd_props.vendorID == 0x14E4) // Broadcom devices + { + LOGI("Using Vulkan performance counters from Broadcom device"); + + // NOTE: The names here are actually regular-expressions. + // Counter names can change between hardware variants for the same vendor, + // so regular expression names mean that multiple h/w variants can be easily supported. + // clang-format off + vendor_data = { + {StatIndex::gpu_cycles, {"cycle_count"}}, + {StatIndex::gpu_vertex_cycles, {"qpu_active_cycles_vertex_coord_user"}}, + {StatIndex::gpu_fragment_cycles, {"qpu_active_cycles_fragment"}}, + {StatIndex::gpu_fragment_jobs, {"render_jobs_completed"}}, + {StatIndex::gpu_ext_reads, {"gpu_mem_reads"}}, + {StatIndex::gpu_ext_writes, {"gpu_mem_writes"}}, + {StatIndex::gpu_ext_read_bytes, {"gpu_bytes_read"}}, + {StatIndex::gpu_ext_write_bytes, {"gpu_bytes_written"}}, + }; + // clang-format on + + // Override vendor-specific graph data + vendor_data.at(StatIndex::gpu_vertex_cycles).set_vendor_graph_data({"Vertex/Coord/User Cycles", "{:4.0f}/s"}); + vendor_data.at(StatIndex::gpu_fragment_jobs).set_vendor_graph_data({"Render Jobs", "{:4.0f}/s"}); + + return true; + } +#if 0 + else if (pd_props.vendorID == xxxx) // Other vendor's devices + { + // Fill vendor_data for other vendor + return true; + } +#endif + return false; // Unsupported vendor +} + +bool VulkanStatsProvider::create_query_pools(uint32_t num_framebuffers, uint32_t queue_family_index) +{ + const PhysicalDevice &gpu = device.get_gpu(); + + // Now we know the available counters, we can build a query pool that will collect them. + // We will check that the counters can be collected in a single pass. Multi-pass would + // be a big performance hit so for these samples, we don't want to use it. + VkQueryPoolPerformanceCreateInfoKHR perf_create_info{}; + perf_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR; + perf_create_info.queueFamilyIndex = queue_family_index; + perf_create_info.counterIndexCount = counter_indices.size(); + perf_create_info.pCounterIndices = counter_indices.data(); + + uint32_t passes_needed; + vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(gpu.get_handle(), &perf_create_info, + &passes_needed); + if (passes_needed != 1) + { + // Needs more than one pass, remove all our supported stats + LOGW("Requested Vulkan stats require multiple passes, we won't collect them"); + return false; + } + + // We will need a query pool to report the stats back to us + VkQueryPoolCreateInfo pool_create_info{}; + pool_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + pool_create_info.pNext = &perf_create_info; + pool_create_info.queryType = VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR; + pool_create_info.queryCount = num_framebuffers; + + query_pool = std::make_unique(device, pool_create_info); + + if (!query_pool) + { + LOGW("Failed to create performance query pool"); + return false; + } + + // Reset the query pool before first use. We cannot do these in the command buffer + // as that is invalid usage for performance queries due to the potential for multple + // passes being required. + query_pool->host_reset(0, num_framebuffers); + + if (has_timestamps) + { + // If we support timestamp queries we will use those to more accurately measure + // the time spent executing a command buffer than just a frame-to-frame timer + // in software. + VkQueryPoolCreateInfo timestamp_pool_create_info{}; + timestamp_pool_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + timestamp_pool_create_info.queryType = VK_QUERY_TYPE_TIMESTAMP; + timestamp_pool_create_info.queryCount = num_framebuffers * 2; // 2 timestamps per frame (start & end) + + timestamp_pool = std::make_unique(device, timestamp_pool_create_info); + } + + return true; +} + +bool VulkanStatsProvider::is_supported(const CounterSamplingConfig &sampling_config) const +{ + // Continuous sampling mode cannot be supported by VK_KHR_performance_query + if (sampling_config.mode == CounterSamplingMode::Continuous) + return false; + + // The VK_KHR_performance_query must be available and enabled + if (!(device.is_enabled("VK_KHR_performance_query") && device.is_enabled("VK_EXT_host_query_reset"))) + return false; + + // Check the performance query features flag. + // Note: VK_KHR_get_physical_device_properties2 is a pre-requisite of VK_KHR_performance_query + // so must be present. + VkPhysicalDevicePerformanceQueryFeaturesKHR perf_query_features{}; + perf_query_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR; + + VkPhysicalDeviceFeatures2KHR device_features{}; + device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + device_features.pNext = &perf_query_features; + + vkGetPhysicalDeviceFeatures2(device.get_gpu().get_handle(), &device_features); + if (!perf_query_features.performanceCounterQueryPools) + return false; + + return true; +} + +bool VulkanStatsProvider::is_available(StatIndex index) const +{ + return stat_data.find(index) != stat_data.end(); +} + +const StatGraphData &VulkanStatsProvider::get_graph_data(StatIndex index) const +{ + assert(is_available(index) && "VulkanStatsProvider::get_graph_data() called with invalid StatIndex"); + + const auto &data = vendor_data.find(index)->second; + if (data.has_vendor_graph_data) + return data.graph_data; + + return def_graph_map[index]; +} + +void VulkanStatsProvider::command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) +{ + if (timestamp_pool) + { + // We use TimestampQueries when available to provide a more accurate delta_time. + // This counters are from a single command buffer execution, but the passed + // delta time is a frame-to-frame s/w measure. A timestamp query in the the cmd + // buffer gives the actual elapsed time where the counters were measured. + cb.reset_query_pool(*timestamp_pool, active_frame_idx * 2, 1); + cb.write_timestamp(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, *timestamp_pool, + active_frame_idx * 2); + } + + if (query_pool) + cb.begin_query(*query_pool, active_frame_idx, VkQueryControlFlags(0)); +} + +void VulkanStatsProvider::command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) +{ + if (query_pool) + { + // Perform a barrier to ensure all previous commands complete before ending the query + // This does not block later commands from executing as we use BOTTOM_OF_PIPE in the + // dst stage mask + vkCmdPipelineBarrier(cb.get_handle(), + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + 0, 0, nullptr, 0, nullptr, 0, nullptr); + cb.end_query(*query_pool, active_frame_idx); + + ++queries_ready; + } + + if (timestamp_pool) + { + cb.reset_query_pool(*timestamp_pool, active_frame_idx * 2 + 1, 1); + cb.write_timestamp(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, *timestamp_pool, + active_frame_idx * 2 + 1); + } +} + +static double get_counter_value(const VkPerformanceCounterResultKHR &result, + VkPerformanceCounterStorageKHR storage) +{ + switch (storage) + { + case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR: + return result.int32; + case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR: + return result.int64; + case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR: + return result.uint32; + case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR: + return result.uint64; + case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR: + return result.float32; + case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR: + return result.float64; + default: + assert(0); + return 0.0; + } +} + +float VulkanStatsProvider::get_best_delta_time(float sw_delta_time, uint32_t active_frame_idx) const +{ + if (!timestamp_pool) + return sw_delta_time; + + float delta_time = sw_delta_time; + + // Query the timestamps to get an accurate delta time + std::array timestamps; + + VkResult r = timestamp_pool->get_results(active_frame_idx * 2, 2, + timestamps.size() * sizeof(uint64_t), + timestamps.data(), sizeof(uint64_t), + VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); + if (r == VK_SUCCESS) + { + uint64_t elapsed_ns = (timestamps[1] - timestamps[0]) * timestamp_period; + delta_time = float(double(elapsed_ns) / 1000000000.0); + } + + return delta_time; +} + +StatsProvider::Sample VulkanStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +{ + Sample out; + if (!query_pool || queries_ready == 0) + return out; + + VkDeviceSize stride = sizeof(VkPerformanceCounterResultKHR) * counter_indices.size(); + + std::vector results(counter_indices.size()); + + VkResult r = query_pool->get_results(active_frame_idx, 1, + results.size() * sizeof(VkPerformanceCounterResultKHR), + results.data(), stride, VK_QUERY_RESULT_WAIT_BIT); + if (r != VK_SUCCESS) + return out; + + // Use timestamps to get a more accurate delta if available + delta_time = get_best_delta_time(delta_time, active_frame_idx); + + // Parse the results - they are in the order we gave in counter_indices + for (const auto &s : stat_data) + { + StatIndex si = s.first; + + bool need_divisor = (stat_data[si].scaling == StatScaling::ByCounter); + double divisor_value = 1.0; + double value = 0.0; + bool found_ctr = false, found_div = !need_divisor; + + for (uint32_t i = 0; !(found_ctr && found_div) && i < counter_indices.size(); i++) + { + if (s.second.counter_index == counter_indices[i]) + { + value = get_counter_value(results[i], stat_data[si].storage); + found_ctr = true; + } + if (need_divisor && s.second.divisor_counter_index == counter_indices[i]) + { + divisor_value = get_counter_value(results[i], stat_data[si].divisor_storage); + found_div = true; + } + } + + if (found_ctr && found_div) + { + if (stat_data[si].scaling == StatScaling::ByDeltaTime && delta_time != 0.0) + value /= delta_time; + else if (stat_data[si].scaling == StatScaling::ByCounter && divisor_value != 0.0) + value /= divisor_value; + out[si].result = value; + } + } + + // Now reset the query we just fetched the results from + query_pool->host_reset(active_frame_idx, 1); + + --queries_ready; + + return out; +} + +} // namespace vkb diff --git a/framework/stats/vulkan_stats_provider.h b/framework/stats/vulkan_stats_provider.h new file mode 100644 index 0000000000..c0e09c65f0 --- /dev/null +++ b/framework/stats/vulkan_stats_provider.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2020, Broadcom Inc. and Contributors + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "core/query_pool.h" +#include "stats_provider.h" + +namespace vkb +{ +class VulkanStatsProvider : public StatsProvider +{ + private: + struct StatData + { + StatScaling scaling; + uint32_t counter_index; + uint32_t divisor_counter_index; + VkPerformanceCounterStorageKHR storage; + VkPerformanceCounterStorageKHR divisor_storage; + StatGraphData graph_data; + + StatData() = default; + + StatData(uint32_t counter_index, VkPerformanceCounterStorageKHR storage, + StatScaling stat_scaling = StatScaling::ByDeltaTime, + uint32_t divisor_index = std::numeric_limits::max(), + VkPerformanceCounterStorageKHR divisor_storage = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR) : + scaling(stat_scaling), + counter_index(counter_index), + divisor_counter_index(divisor_index), + storage(storage), + divisor_storage(divisor_storage) + {} + }; + + struct VendorStat + { + VendorStat(const std::string &name, const std::string &divisor_name = "") : + name(name), + divisor_name(divisor_name) + { + if (divisor_name != "") + scaling = StatScaling::ByCounter; + } + + void set_vendor_graph_data(const StatGraphData &data) + { + has_vendor_graph_data = true; + graph_data = data; + } + + std::string name; + StatScaling scaling = StatScaling::ByDeltaTime; + std::string divisor_name; + bool has_vendor_graph_data = false; + StatGraphData graph_data; + }; + + using StatDataMap = std::unordered_map; + using VendorStatMap = std::unordered_map; + + public: + /** + * @brief Constructs a VulkanStatsProvider + * @param device The device on which to collect stats + * @param requested_stats Set of stats to be collected. Supported stats will be removed from the set. + */ + VulkanStatsProvider(Device &device, std::set &requested_stats, + const CounterSamplingConfig &sampling_config, size_t num_framebuffers); + + /** + * @brief Destructs a VulkanStatsProvider + */ + ~VulkanStatsProvider(); + + /** + * @brief Checks if this provider can supply the given enabled stat + * @param index The stat index + * @return True if the stat is available, false otherwise + */ + bool is_available(StatIndex index) const override; + + /** + * @brief Retrieve graphing data for the given enabled stat + * @param index The stat index + */ + const StatGraphData &get_graph_data(StatIndex index) const override; + + /** + * @brief Retrieve a new sample set from polled sampling + * @param delta_time Time since last sample + * @param active_frame_idx Which of the framebuffers is active + */ + Sample sample(float delta_time, uint32_t active_frame_idx) override; + + /** + * @brief A command buffer that we want stats about has just begun + * @param cb The command buffer + * @param active_frame_idx Which of the framebuffers is active + */ + void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) override; + + /** + * @brief A command buffer that we want stats about is about to be ended + * @param cb The command buffer + * @param active_frame_idx Which of the framebuffers is active + */ + void command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) override; + + private: + bool is_supported(const CounterSamplingConfig &sampling_config) const; + + bool fill_vendor_data(); + + bool create_query_pools(uint32_t num_framebuffers, uint32_t queue_family_index); + + float get_best_delta_time(float sw_delta_time, uint32_t active_frame_idx) const; + + private: + Device &device; + + // The query pool for the performance queries + std::unique_ptr query_pool; + + // Do we support timestamp queries + bool has_timestamps{false}; + + // The timestamp period + float timestamp_period{1.0f}; + + // And one for timestamps + std::unique_ptr timestamp_pool; + + // Map of vendor specific stat data + VendorStatMap vendor_data; + + // Only stats which are available and were requested end up in stat_data + StatDataMap stat_data; + + // An ordered list of the Vulkan counter ids + std::vector counter_indices; + + uint32_t queries_ready = 0; +}; + +} // namespace vkb diff --git a/framework/vulkan_sample.cpp b/framework/vulkan_sample.cpp index 5310c969ad..77cf51befb 100644 --- a/framework/vulkan_sample.cpp +++ b/framework/vulkan_sample.cpp @@ -148,11 +148,11 @@ void VulkanSample::update_scene(float delta_time) } } -void VulkanSample::update_stats(float delta_time) +void VulkanSample::update_stats(float delta_time, uint32_t active_frame_idx) { if (stats) { - stats->update(delta_time); + stats->update(delta_time, active_frame_idx); static float stats_view_count = 0.0f; stats_view_count += delta_time; @@ -190,16 +190,19 @@ void VulkanSample::update(float delta_time) { update_scene(delta_time); - update_stats(delta_time); - update_gui(delta_time); auto &command_buffer = render_context->begin(); + uint32_t active_frame_idx = render_context->get_active_frame_index(); + update_stats(delta_time, active_frame_idx); + command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + stats->command_buffer_begun(command_buffer, active_frame_idx); draw(command_buffer, render_context->get_active_frame().get_render_target()); + stats->command_buffer_ending(command_buffer, active_frame_idx); command_buffer.end(); render_context->submit(command_buffer); @@ -464,4 +467,5 @@ sg::Scene &VulkanSample::get_scene() assert(scene && "Scene not loaded"); return *scene; } + } // namespace vkb diff --git a/framework/vulkan_sample.h b/framework/vulkan_sample.h index 04690d3c1d..3067f2a8ee 100644 --- a/framework/vulkan_sample.h +++ b/framework/vulkan_sample.h @@ -28,7 +28,7 @@ #include "scene_graph/node.h" #include "scene_graph/scene.h" #include "scene_graph/scripts/node_animation.h" -#include "stats.h" +#include "stats/stats.h" namespace vkb { @@ -184,7 +184,7 @@ class VulkanSample : public Application * @brief Update counter values * @param delta_time */ - void update_stats(float delta_time); + void update_stats(float delta_time, uint32_t active_frame_idx); /** * @brief Update GUI diff --git a/samples/performance/afbc/afbc.cpp b/samples/performance/afbc/afbc.cpp index 98bf366763..93c590cb2e 100644 --- a/samples/performance/afbc/afbc.cpp +++ b/samples/performance/afbc/afbc.cpp @@ -23,7 +23,7 @@ #include "platform/filesystem.h" #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" @@ -62,8 +62,11 @@ bool AFBCSample::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - stats = std::make_unique(std::set{vkb::StatIndex::l2_ext_write_bytes}); - gui = std::make_unique(*this, platform.get_window()); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::gpu_ext_write_bytes}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/command_buffer_usage/command_buffer_usage.cpp b/samples/performance/command_buffer_usage/command_buffer_usage.cpp index df15a21d43..d5d6dc9a38 100644 --- a/samples/performance/command_buffer_usage/command_buffer_usage.cpp +++ b/samples/performance/command_buffer_usage/command_buffer_usage.cpp @@ -27,7 +27,7 @@ #include "gui.h" #include "platform/filesystem.h" #include "platform/platform.h" -#include "stats.h" +#include "stats/stats.h" CommandBufferUsage::CommandBufferUsage() { @@ -71,8 +71,11 @@ bool CommandBufferUsage::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - stats = std::make_unique(std::set{vkb::StatIndex::frame_times, vkb::StatIndex::cpu_cycles}); - gui = std::make_unique(*this, platform.get_window()); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::frame_times, vkb::StatIndex::cpu_cycles}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); // Adjust the maximum number of secondary command buffers // In this sample, only the recording of opaque meshes will be multi-threaded @@ -115,20 +118,24 @@ void CommandBufferUsage::update(float delta_time) subpass_state.multi_threading = gui_multi_threading; - update_scene(delta_time); + auto &render_context = get_render_context(); - update_stats(delta_time); + update_scene(delta_time); update_gui(delta_time); - auto &render_context = get_render_context(); - auto &primary_command_buffer = render_context.begin(subpass_state.command_buffer_reset_mode); + uint32_t active_frame_idx = render_context.get_active_frame_index(); + + update_stats(delta_time, active_frame_idx); + primary_command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + stats->command_buffer_begun(primary_command_buffer, active_frame_idx); draw(primary_command_buffer, render_context.get_active_frame().get_render_target()); + stats->command_buffer_ending(primary_command_buffer, active_frame_idx); primary_command_buffer.end(); render_context.submit(primary_command_buffer); diff --git a/samples/performance/descriptor_management/descriptor_management.cpp b/samples/performance/descriptor_management/descriptor_management.cpp index 33ce15fa39..9345f029a1 100644 --- a/samples/performance/descriptor_management/descriptor_management.cpp +++ b/samples/performance/descriptor_management/descriptor_management.cpp @@ -23,7 +23,7 @@ #include "platform/filesystem.h" #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" -#include "stats.h" +#include "stats/stats.h" DescriptorManagement::DescriptorManagement() { @@ -57,9 +57,12 @@ bool DescriptorManagement::prepare(vkb::Platform &platform) render_pipeline.add_subpass(std::move(scene_subpass)); set_render_pipeline(std::move(render_pipeline)); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + // Add a GUI with the stats you want to monitor - stats = std::make_unique(std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window()); + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::frame_times}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } @@ -68,14 +71,16 @@ void DescriptorManagement::update(float delta_time) { update_scene(delta_time); - update_stats(delta_time); - update_gui(delta_time); auto &render_context = get_render_context(); auto &command_buffer = render_context.begin(); + uint32_t active_frame_idx = render_context.get_active_frame_index(); + + update_stats(delta_time, active_frame_idx); + // Process GUI input auto buffer_alloc_strategy = (buffer_allocation.value == 0) ? vkb::BufferAllocationStrategy::OneAllocationPerBuffer : @@ -90,9 +95,11 @@ void DescriptorManagement::update(float delta_time) } command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); + stats->command_buffer_begun(command_buffer, active_frame_idx); draw(command_buffer, render_context.get_active_frame().get_render_target()); + stats->command_buffer_ending(command_buffer, active_frame_idx); command_buffer.end(); render_context.submit(command_buffer); diff --git a/samples/performance/layout_transitions/layout_transitions.cpp b/samples/performance/layout_transitions/layout_transitions.cpp index c94adc2eac..daf956b274 100644 --- a/samples/performance/layout_transitions/layout_transitions.cpp +++ b/samples/performance/layout_transitions/layout_transitions.cpp @@ -28,7 +28,7 @@ #include "rendering/subpasses/lighting_subpass.h" #include "scene_graph/components/material.h" #include "scene_graph/components/pbr_material.h" -#include "stats.h" +#include "stats/stats.h" LayoutTransitions::LayoutTransitions() { @@ -66,9 +66,12 @@ bool LayoutTransitions::prepare(vkb::Platform &platform) lighting_pipeline.add_subpass(std::move(lighting_subpass)); lighting_pipeline.set_load_store(vkb::gbuffer::get_load_all_store_swapchain()); - stats = std::make_unique(std::set{vkb::StatIndex::killed_tiles, - vkb::StatIndex::l2_ext_write_bytes}); - gui = std::make_unique(*this, platform.get_window()); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::gpu_killed_tiles, + vkb::StatIndex::gpu_ext_write_bytes}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/msaa/msaa.cpp b/samples/performance/msaa/msaa.cpp index e368a8912c..fb311d8ce2 100644 --- a/samples/performance/msaa/msaa.cpp +++ b/samples/performance/msaa/msaa.cpp @@ -24,7 +24,7 @@ #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" #include "rendering/subpasses/postprocessing_subpass.h" -#include "stats.h" +#include "stats/stats.h" namespace { @@ -125,11 +125,14 @@ bool MSAASample::prepare(vkb::Platform &platform) update_pipelines(); - stats = std::make_unique(std::set{vkb::StatIndex::frame_times, - vkb::StatIndex::l2_ext_read_bytes, - vkb::StatIndex::l2_ext_write_bytes}); + size_t num_framebuffers = get_render_context().get_render_frames().size(); - gui = std::make_unique(*this, platform.get_window()); + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::frame_times, + vkb::StatIndex::gpu_ext_read_bytes, + vkb::StatIndex::gpu_ext_write_bytes}); + + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/pipeline_barriers/pipeline_barriers.cpp b/samples/performance/pipeline_barriers/pipeline_barriers.cpp index 739ef56201..12905c01d7 100644 --- a/samples/performance/pipeline_barriers/pipeline_barriers.cpp +++ b/samples/performance/pipeline_barriers/pipeline_barriers.cpp @@ -29,7 +29,7 @@ #include "scene_graph/components/material.h" #include "scene_graph/components/pbr_material.h" #include "scene_graph/components/perspective_camera.h" -#include "stats.h" +#include "stats/stats.h" PipelineBarriers::PipelineBarriers() { @@ -100,11 +100,12 @@ bool PipelineBarriers::prepare(vkb::Platform &platform) lighting_pipeline.add_subpass(std::move(lighting_subpass)); lighting_pipeline.set_load_store(vkb::gbuffer::get_load_all_store_swapchain()); - stats = std::make_unique(std::set{vkb::StatIndex::frame_times, - vkb::StatIndex::vertex_compute_cycles, - vkb::StatIndex::fragment_cycles}, + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::frame_times, vkb::StatIndex::gpu_vertex_cycles, vkb::StatIndex::gpu_fragment_cycles}, vkb::CounterSamplingConfig{vkb::CounterSamplingMode::Continuous}); - gui = std::make_unique(*this, platform.get_window()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/pipeline_cache/pipeline_cache.cpp b/samples/performance/pipeline_cache/pipeline_cache.cpp index fc0737fd85..a1591f19d1 100644 --- a/samples/performance/pipeline_cache/pipeline_cache.cpp +++ b/samples/performance/pipeline_cache/pipeline_cache.cpp @@ -26,7 +26,7 @@ #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" #include "scene_graph/node.h" -#include "stats.h" +#include "stats/stats.h" PipelineCache::PipelineCache() { @@ -104,14 +104,17 @@ bool PipelineCache::prepare(vkb::Platform &platform) // Build all pipelines from a previous run resource_cache.warmup(data_cache); - stats = std::make_unique(std::set{vkb::StatIndex::frame_times}); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::frame_times}); float dpi_factor = platform.get_window().get_dpi_factor(); button_size.x = button_size.x * dpi_factor; button_size.y = button_size.y * dpi_factor; - gui = std::make_unique(*this, platform.get_window()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); load_scene("scenes/sponza/Sponza01.gltf"); diff --git a/samples/performance/render_passes/render_passes.cpp b/samples/performance/render_passes/render_passes.cpp index 2229fe9c47..2644d70737 100644 --- a/samples/performance/render_passes/render_passes.cpp +++ b/samples/performance/render_passes/render_passes.cpp @@ -23,7 +23,7 @@ #include "platform/filesystem.h" #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" @@ -46,12 +46,12 @@ void RenderPassesSample::reset_stats_view() { if (load.value == VK_ATTACHMENT_LOAD_OP_LOAD) { - gui->get_stats_view().reset_max_value(vkb::StatIndex::l2_ext_read_bytes); + gui->get_stats_view().reset_max_value(vkb::StatIndex::gpu_ext_read_bytes); } if (store.value == VK_ATTACHMENT_STORE_OP_STORE) { - gui->get_stats_view().reset_max_value(vkb::StatIndex::l2_ext_write_bytes); + gui->get_stats_view().reset_max_value(vkb::StatIndex::gpu_ext_write_bytes); } } @@ -109,11 +109,13 @@ bool RenderPassesSample::prepare(vkb::Platform &platform) return false; } - auto enabled_stats = {vkb::StatIndex::fragment_cycles, - vkb::StatIndex::l2_ext_read_bytes, - vkb::StatIndex::l2_ext_write_bytes}; + auto enabled_stats = {vkb::StatIndex::gpu_fragment_cycles, + vkb::StatIndex::gpu_ext_read_bytes, + vkb::StatIndex::gpu_ext_write_bytes}; - stats = std::make_unique(enabled_stats); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, enabled_stats); load_scene("scenes/sponza/Sponza01.gltf"); @@ -129,7 +131,7 @@ bool RenderPassesSample::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - gui = std::make_unique(*this, platform.get_window()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/render_subpasses/render_subpasses.cpp b/samples/performance/render_subpasses/render_subpasses.cpp index 5a9f8cfbe2..e8836501c6 100644 --- a/samples/performance/render_subpasses/render_subpasses.cpp +++ b/samples/performance/render_subpasses/render_subpasses.cpp @@ -154,15 +154,18 @@ bool RenderSubpasses::prepare(vkb::Platform &platform) geometry_render_pipeline = create_geometry_renderpass(); lighting_render_pipeline = create_lighting_renderpass(); - // Enable gui - gui = std::make_unique(*this, platform.get_window()); - // Enable stats - auto enabled_stats = {vkb::StatIndex::fragment_jobs, - vkb::StatIndex::tiles, - vkb::StatIndex::l2_ext_read_bytes, - vkb::StatIndex::l2_ext_write_bytes}; - stats = std::make_unique(enabled_stats); + auto enabled_stats = {vkb::StatIndex::gpu_fragment_jobs, + vkb::StatIndex::gpu_tiles, + vkb::StatIndex::gpu_ext_read_bytes, + vkb::StatIndex::gpu_ext_write_bytes}; + + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, enabled_stats); + + // Enable gui + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/specialization_constants/specialization_constants.cpp b/samples/performance/specialization_constants/specialization_constants.cpp index 2d017b129f..32bfc6db86 100644 --- a/samples/performance/specialization_constants/specialization_constants.cpp +++ b/samples/performance/specialization_constants/specialization_constants.cpp @@ -24,7 +24,7 @@ #include "gui.h" #include "platform/filesystem.h" #include "platform/platform.h" -#include "stats.h" +#include "stats/stats.h" SpecializationConstants::SpecializationConstants() { @@ -55,9 +55,12 @@ bool SpecializationConstants::prepare(vkb::Platform &platform) specialization_constants_pipeline = create_specialization_renderpass(); standard_pipeline = create_standard_renderpass(); - gui = std::make_unique(*this, platform.get_window()); + size_t num_framebuffers = get_render_context().get_render_frames().size(); - stats = std::make_unique(std::set{vkb::StatIndex::fragment_cycles}); + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::gpu_fragment_cycles}); + + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/surface_rotation/surface_rotation.cpp b/samples/performance/surface_rotation/surface_rotation.cpp index e43a26a37e..fad73f6b79 100644 --- a/samples/performance/surface_rotation/surface_rotation.cpp +++ b/samples/performance/surface_rotation/surface_rotation.cpp @@ -34,7 +34,7 @@ VKBP_ENABLE_WARNINGS() #include "rendering/subpasses/forward_subpass.h" #include "scene_graph/components/material.h" #include "scene_graph/components/pbr_material.h" -#include "stats.h" +#include "stats/stats.h" SurfaceRotation::SurfaceRotation() { @@ -56,9 +56,11 @@ bool SurfaceRotation::prepare(vkb::Platform &platform) throw std::runtime_error("Requires a surface to run sample"); } - auto enabled_stats = {vkb::StatIndex::l2_ext_read_stalls, vkb::StatIndex::l2_ext_write_stalls}; + auto enabled_stats = {vkb::StatIndex::gpu_ext_read_stalls, vkb::StatIndex::gpu_ext_write_stalls}; - stats = std::make_unique(enabled_stats); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, enabled_stats); load_scene("scenes/sponza/Sponza01.gltf"); @@ -74,7 +76,7 @@ bool SurfaceRotation::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - gui = std::make_unique(*this, platform.get_window()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } @@ -122,11 +124,11 @@ void SurfaceRotation::update(float delta_time) void SurfaceRotation::draw_gui() { - auto extent = get_render_context().get_swapchain().get_extent(); - std::string rotation_by_str = pre_rotate ? "application" : "compositor"; - auto prerotate_str = "Pre-rotate (" + rotation_by_str + " rotates)"; - auto transform = vkb::to_string(get_render_context().get_swapchain().get_transform()); - auto resolution_str = "Res: " + std::to_string(extent.width) + "x" + std::to_string(extent.height); + auto extent = get_render_context().get_swapchain().get_extent(); + std::string rotation_by_str = pre_rotate ? "application" : "compositor"; + auto prerotate_str = "Pre-rotate (" + rotation_by_str + " rotates)"; + auto transform = vkb::to_string(get_render_context().get_swapchain().get_transform()); + auto resolution_str = "Res: " + std::to_string(extent.width) + "x" + std::to_string(extent.height); // If pre-rotate is enabled, the aspect ratio will not change, therefore need to check if the // scene has been rotated using the swapchain preTransform attribute diff --git a/samples/performance/swapchain_images/swapchain_images.cpp b/samples/performance/swapchain_images/swapchain_images.cpp index fbe76a787a..52338cb13c 100644 --- a/samples/performance/swapchain_images/swapchain_images.cpp +++ b/samples/performance/swapchain_images/swapchain_images.cpp @@ -27,7 +27,7 @@ #include "rendering/subpasses/forward_subpass.h" #include "scene_graph/components/material.h" #include "scene_graph/components/pbr_material.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" @@ -62,8 +62,11 @@ bool SwapchainImages::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - stats = std::make_unique(std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window()); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::frame_times}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/wait_idle/wait_idle.cpp b/samples/performance/wait_idle/wait_idle.cpp index 396ef9a84f..50bbb68c34 100644 --- a/samples/performance/wait_idle/wait_idle.cpp +++ b/samples/performance/wait_idle/wait_idle.cpp @@ -23,7 +23,7 @@ #include "platform/filesystem.h" #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" @@ -63,9 +63,12 @@ bool WaitIdle::prepare(vkb::Platform &platform) render_pipeline.add_subpass(std::move(scene_subpass)); set_render_pipeline(std::move(render_pipeline)); + size_t num_framebuffers = get_render_context().get_render_frames().size(); + // Add a GUI with the stats you want to monitor - stats = std::make_unique(std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window()); + stats = std::make_unique(get_device(), num_framebuffers, + std::set{vkb::StatIndex::frame_times}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/tests/system_test/sub_tests/bonza/bonza.cpp b/tests/system_test/sub_tests/bonza/bonza.cpp index 0929aaedfe..315b633a80 100644 --- a/tests/system_test/sub_tests/bonza/bonza.cpp +++ b/tests/system_test/sub_tests/bonza/bonza.cpp @@ -20,7 +20,7 @@ #include "gltf_loader.h" #include "gui.h" #include "platform/platform.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" #endif diff --git a/tests/system_test/sub_tests/sponza/sponza.cpp b/tests/system_test/sub_tests/sponza/sponza.cpp index e3ca89dc4c..67413e0dc5 100644 --- a/tests/system_test/sub_tests/sponza/sponza.cpp +++ b/tests/system_test/sub_tests/sponza/sponza.cpp @@ -20,7 +20,7 @@ #include "gltf_loader.h" #include "gui.h" #include "platform/platform.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" #endif diff --git a/tests/system_test/test_framework/gltf_loader_test.cpp b/tests/system_test/test_framework/gltf_loader_test.cpp index 2c4072ad6f..27f68c966b 100644 --- a/tests/system_test/test_framework/gltf_loader_test.cpp +++ b/tests/system_test/test_framework/gltf_loader_test.cpp @@ -22,7 +22,7 @@ #include "platform/filesystem.h" #include "platform/platform.h" #include "rendering/subpasses/forward_subpass.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" #endif diff --git a/tests/system_test/test_framework/vulkan_test.cpp b/tests/system_test/test_framework/vulkan_test.cpp index 0d63033a66..45b4db2274 100644 --- a/tests/system_test/test_framework/vulkan_test.cpp +++ b/tests/system_test/test_framework/vulkan_test.cpp @@ -20,7 +20,7 @@ #include "gltf_loader.h" #include "gui.h" #include "platform/platform.h" -#include "stats.h" +#include "stats/stats.h" #if defined(VK_USE_PLATFORM_ANDROID_KHR) # include "platform/android/android_platform.h" #endif From ff987151c718e3aaf73c8c4b4f3069bb5577e6a6 Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Wed, 6 May 2020 09:52:26 +0100 Subject: [PATCH 02/11] Fixed up doxygen and copyright warnings --- framework/core/query_pool.h | 4 ++-- framework/gui.h | 1 + framework/pch.h | 2 +- framework/stats/hwcpipe_stats_provider.h | 1 + framework/stats/stats.h | 3 +++ framework/stats/vulkan_stats_provider.h | 2 ++ framework/vulkan_sample.h | 1 + tests/system_test/sub_tests/bonza/bonza.cpp | 2 +- tests/system_test/sub_tests/sponza/sponza.cpp | 2 +- tests/system_test/test_framework/gltf_loader_test.cpp | 2 +- tests/system_test/test_framework/vulkan_test.cpp | 2 +- 11 files changed, 15 insertions(+), 7 deletions(-) diff --git a/framework/core/query_pool.h b/framework/core/query_pool.h index b92c6dc40d..c42f8b213e 100644 --- a/framework/core/query_pool.h +++ b/framework/core/query_pool.h @@ -54,7 +54,6 @@ class QueryPool /** * @brief Reset a range of queries in the query pool. Only call if VK_EXT_host_query_reset is enabled. - * @param pool The query pool * @param firstQuery The first query to reset * @param queryCount The number of queries to reset */ @@ -64,7 +63,8 @@ class QueryPool * @brief Get query pool results * @param first_query The initial query index * @param num_queries The number of queries to read - * @param results Result vector, must be large enough to hold results + * @param result_bytes The number of bytes in the results array + * @param results Array of bytes result_bytes long * @param stride The stride in bytes between results for individual queries * @param flags A bitmask of VkQueryResultFlagBits */ diff --git a/framework/gui.h b/framework/gui.h index b1c4270e57..a1da8e0b57 100644 --- a/framework/gui.h +++ b/framework/gui.h @@ -239,6 +239,7 @@ class Gui * @brief Initializes the Gui * @param sample A vulkan render context * @param window A Window object from which to draw DPI and content scaling information + * @param stats A statistics object (null if no statistics are used) * @param font_size The font size * @param explicit_update If true, update buffers every frame */ diff --git a/framework/pch.h b/framework/pch.h index c45199a067..f1b274fb8f 100644 --- a/framework/pch.h +++ b/framework/pch.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2018-2019, Arm Limited and Contributors +/* Copyright (c) 2018-2020, Arm Limited and Contributors * * SPDX-License-Identifier: Apache-2.0 * diff --git a/framework/stats/hwcpipe_stats_provider.h b/framework/stats/hwcpipe_stats_provider.h index 7ee2eb25e0..c9de050b6d 100644 --- a/framework/stats/hwcpipe_stats_provider.h +++ b/framework/stats/hwcpipe_stats_provider.h @@ -86,6 +86,7 @@ class HWCPipeStatsProvider : public StatsProvider /** * @brief Constructs a HWCPipeStateProvider * @param requested_stats Set of stats to be collected. Supported stats will be removed from the set. + * @param sampling_config Sampling mode configuration (polling or continuous) */ HWCPipeStatsProvider(std::set &requested_stats, CounterSamplingConfig sampling_config); diff --git a/framework/stats/stats.h b/framework/stats/stats.h index 2c99080771..d3e637b7dd 100644 --- a/framework/stats/stats.h +++ b/framework/stats/stats.h @@ -45,6 +45,7 @@ class Stats /** * @brief Constructs a Stats object * @param device Device on which to collect stats + * @param num_framebuffers The number of buffers in the swapchain * @param requested_stats Set of stats to be collected if available * @param sampling_config Sampling mode configuration (polling or continuous) * @param buffer_size Size of the circular buffers @@ -106,12 +107,14 @@ class Stats /** * @brief A command buffer that we want stats about has just begun * @param cb The command buffer + * @param active_frame_idx Which of the framebuffers is active */ void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx); /** * @brief A command buffer that we want stats about is about to be ended * @param cb The command buffer + * @param active_frame_idx Which of the framebuffers is active */ void command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx); diff --git a/framework/stats/vulkan_stats_provider.h b/framework/stats/vulkan_stats_provider.h index c0e09c65f0..ea1c15f21a 100644 --- a/framework/stats/vulkan_stats_provider.h +++ b/framework/stats/vulkan_stats_provider.h @@ -79,6 +79,8 @@ class VulkanStatsProvider : public StatsProvider * @brief Constructs a VulkanStatsProvider * @param device The device on which to collect stats * @param requested_stats Set of stats to be collected. Supported stats will be removed from the set. + * @param sampling_config Sampling mode configuration (polling or continuous) + * @param num_framebuffers The number of buffers in the swapchain */ VulkanStatsProvider(Device &device, std::set &requested_stats, const CounterSamplingConfig &sampling_config, size_t num_framebuffers); diff --git a/framework/vulkan_sample.h b/framework/vulkan_sample.h index 3067f2a8ee..41c2f6b32b 100644 --- a/framework/vulkan_sample.h +++ b/framework/vulkan_sample.h @@ -183,6 +183,7 @@ class VulkanSample : public Application /** * @brief Update counter values * @param delta_time + * @param active_frame_idx Which of the framebuffers is active */ void update_stats(float delta_time, uint32_t active_frame_idx); diff --git a/tests/system_test/sub_tests/bonza/bonza.cpp b/tests/system_test/sub_tests/bonza/bonza.cpp index 315b633a80..f1e9644801 100644 --- a/tests/system_test/sub_tests/bonza/bonza.cpp +++ b/tests/system_test/sub_tests/bonza/bonza.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019, Arm Limited and Contributors +/* Copyright (c) 2019-2020, Arm Limited and Contributors * * SPDX-License-Identifier: Apache-2.0 * diff --git a/tests/system_test/sub_tests/sponza/sponza.cpp b/tests/system_test/sub_tests/sponza/sponza.cpp index 67413e0dc5..97ceb2c565 100644 --- a/tests/system_test/sub_tests/sponza/sponza.cpp +++ b/tests/system_test/sub_tests/sponza/sponza.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019, Arm Limited and Contributors +/* Copyright (c) 2019-2020, Arm Limited and Contributors * * SPDX-License-Identifier: Apache-2.0 * diff --git a/tests/system_test/test_framework/gltf_loader_test.cpp b/tests/system_test/test_framework/gltf_loader_test.cpp index 27f68c966b..db94c8e2be 100644 --- a/tests/system_test/test_framework/gltf_loader_test.cpp +++ b/tests/system_test/test_framework/gltf_loader_test.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019, Arm Limited and Contributors +/* Copyright (c) 2019-2020, Arm Limited and Contributors * * SPDX-License-Identifier: Apache-2.0 * diff --git a/tests/system_test/test_framework/vulkan_test.cpp b/tests/system_test/test_framework/vulkan_test.cpp index 45b4db2274..93dbb024d2 100644 --- a/tests/system_test/test_framework/vulkan_test.cpp +++ b/tests/system_test/test_framework/vulkan_test.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2019, Arm Limited and Contributors +/* Copyright (c) 2019-2020, Arm Limited and Contributors * * SPDX-License-Identifier: Apache-2.0 * From 2f232d04934c4972106d2cd392bc044f8f5f1683 Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Wed, 6 May 2020 12:43:16 +0100 Subject: [PATCH 03/11] Hopefully fixed the Windows build --- framework/stats/vulkan_stats_provider.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp index cd5e0f4f88..f6362b3235 100644 --- a/framework/stats/vulkan_stats_provider.cpp +++ b/framework/stats/vulkan_stats_provider.cpp @@ -145,7 +145,7 @@ VulkanStatsProvider::VulkanStatsProvider(Device &device, std::set &re } // Now we know the counters and that we can collect them, make a query pool for the results. - if (!create_query_pools(num_framebuffers, queue_family_index)) + if (!create_query_pools(uint32_t(num_framebuffers), queue_family_index)) { stat_data.clear(); counter_indices.clear(); @@ -217,7 +217,7 @@ bool VulkanStatsProvider::create_query_pools(uint32_t num_framebuffers, uint32_t VkQueryPoolPerformanceCreateInfoKHR perf_create_info{}; perf_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR; perf_create_info.queueFamilyIndex = queue_family_index; - perf_create_info.counterIndexCount = counter_indices.size(); + perf_create_info.counterIndexCount = uint32_t(counter_indices.size()); perf_create_info.pCounterIndices = counter_indices.data(); uint32_t passes_needed; @@ -356,17 +356,17 @@ static double get_counter_value(const VkPerformanceCounterResultKHR &result, switch (storage) { case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR: - return result.int32; + return double(result.int32); case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR: - return result.int64; + return double(result.int64); case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR: - return result.uint32; + return double(result.uint32); case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR: - return result.uint64; + return double(result.uint64); case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR: - return result.float32; + return double(result.float32); case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR: - return result.float64; + return double(result.float64); default: assert(0); return 0.0; @@ -389,8 +389,8 @@ float VulkanStatsProvider::get_best_delta_time(float sw_delta_time, uint32_t act VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); if (r == VK_SUCCESS) { - uint64_t elapsed_ns = (timestamps[1] - timestamps[0]) * timestamp_period; - delta_time = float(double(elapsed_ns) / 1000000000.0); + float elapsed_ns = timestamp_period * float(timestamps[1] - timestamps[0]); + delta_time = elapsed_ns * 0.000000001f; } return delta_time; From 72f948d7b8b50731a6079c484f076819548d3e9d Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Wed, 13 May 2020 09:19:39 +0100 Subject: [PATCH 04/11] Addressed some formatting and naming feedback --- framework/core/command_buffer.cpp | 4 +-- framework/core/command_buffer.h | 2 +- framework/core/query_pool.cpp | 4 +-- framework/core/query_pool.h | 6 ++--- framework/gui.h | 3 +-- framework/stats/frame_time_stats_provider.cpp | 6 ++--- framework/stats/frame_time_stats_provider.h | 2 +- framework/stats/hwcpipe_stats_provider.cpp | 14 +++++----- framework/stats/hwcpipe_stats_provider.h | 26 +++++++++---------- framework/stats/stats.cpp | 11 ++++---- framework/stats/stats.h | 24 +++++++++++++---- framework/stats/stats_common.h | 14 +++++----- framework/stats/stats_provider.cpp | 4 +-- framework/stats/stats_provider.h | 10 +++---- framework/stats/vulkan_stats_provider.cpp | 6 ++--- framework/stats/vulkan_stats_provider.h | 5 ++-- framework/vulkan_sample.cpp | 1 + 17 files changed, 78 insertions(+), 64 deletions(-) diff --git a/framework/core/command_buffer.cpp b/framework/core/command_buffer.cpp index 5e47dc9273..71ac1098f9 100644 --- a/framework/core/command_buffer.cpp +++ b/framework/core/command_buffer.cpp @@ -770,10 +770,10 @@ void CommandBuffer::end_query(const QueryPool &query_pool, uint32_t query) vkCmdEndQuery(get_handle(), query_pool.get_handle(), query); } -void CommandBuffer::write_timestamp(VkPipelineStageFlagBits pipelineStage, +void CommandBuffer::write_timestamp(VkPipelineStageFlagBits pipeline_stage, const QueryPool &query_pool, uint32_t query) { - vkCmdWriteTimestamp(get_handle(), pipelineStage, query_pool.get_handle(), query); + vkCmdWriteTimestamp(get_handle(), pipeline_stage, query_pool.get_handle(), query); } VkResult CommandBuffer::reset(ResetMode reset_mode) diff --git a/framework/core/command_buffer.h b/framework/core/command_buffer.h index eb28e7308b..6af395055f 100644 --- a/framework/core/command_buffer.h +++ b/framework/core/command_buffer.h @@ -226,7 +226,7 @@ class CommandBuffer void end_query(const QueryPool &query_pool, uint32_t query); - void write_timestamp(VkPipelineStageFlagBits pipelineStage, const QueryPool &query_pool, uint32_t query); + void write_timestamp(VkPipelineStageFlagBits pipeline_stage, const QueryPool &query_pool, uint32_t query); /** * @brief Reset the command buffer to a state where it can be recorded to diff --git a/framework/core/query_pool.cpp b/framework/core/query_pool.cpp index 8613979afa..4608a55aeb 100644 --- a/framework/core/query_pool.cpp +++ b/framework/core/query_pool.cpp @@ -48,12 +48,12 @@ VkQueryPool QueryPool::get_handle() const return handle; } -void QueryPool::host_reset(uint32_t firstQuery, uint32_t queryCount) +void QueryPool::host_reset(uint32_t first_query, uint32_t query_count) { assert(device.is_enabled("VK_EXT_host_query_reset") && "VK_EXT_host_query_reset needs to be enabled to call QueryPool::host_reset"); - vkResetQueryPoolEXT(device.get_handle(), get_handle(), firstQuery, queryCount); + vkResetQueryPoolEXT(device.get_handle(), get_handle(), first_query, query_count); } VkResult QueryPool::get_results(uint32_t first_query, uint32_t num_queries, diff --git a/framework/core/query_pool.h b/framework/core/query_pool.h index c42f8b213e..47768e27f4 100644 --- a/framework/core/query_pool.h +++ b/framework/core/query_pool.h @@ -54,10 +54,10 @@ class QueryPool /** * @brief Reset a range of queries in the query pool. Only call if VK_EXT_host_query_reset is enabled. - * @param firstQuery The first query to reset - * @param queryCount The number of queries to reset + * @param first_query The first query to reset + * @param query_count The number of queries to reset */ - void host_reset(uint32_t firstQuery, uint32_t queryCount); + void host_reset(uint32_t first_query, uint32_t query_count); /** * @brief Get query pool results diff --git a/framework/gui.h b/framework/gui.h index a1da8e0b57..7ab9be640a 100644 --- a/framework/gui.h +++ b/framework/gui.h @@ -188,7 +188,7 @@ class Gui public: /** * @brief Constructs a StatsView - * @param stats Const pointer to the Stats data object; may be null + * @param stats Const pointer to the Stats data object; may be null */ StatsView(const Stats *stats); @@ -203,7 +203,6 @@ class Gui */ void reset_max_value(const StatIndex index); - /// Per-statistic max values std::map graph_map; float graph_height{50.0f}; diff --git a/framework/stats/frame_time_stats_provider.cpp b/framework/stats/frame_time_stats_provider.cpp index 01e104bc2a..e93ba3a45e 100644 --- a/framework/stats/frame_time_stats_provider.cpp +++ b/framework/stats/frame_time_stats_provider.cpp @@ -17,8 +17,6 @@ #include "frame_time_stats_provider.h" -#include - namespace vkb { FrameTimeStatsProvider::FrameTimeStatsProvider(std::set &requested_stats) @@ -34,9 +32,9 @@ bool FrameTimeStatsProvider::is_available(StatIndex index) const return index == StatIndex::frame_times; } -StatsProvider::Sample FrameTimeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +StatsProvider::Counters FrameTimeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) { - Sample res; + Counters res; // frame_times comes directly from delta_time res[StatIndex::frame_times].result = delta_time; return res; diff --git a/framework/stats/frame_time_stats_provider.h b/framework/stats/frame_time_stats_provider.h index dec35bfd54..73c24e5aa9 100644 --- a/framework/stats/frame_time_stats_provider.h +++ b/framework/stats/frame_time_stats_provider.h @@ -41,6 +41,6 @@ class FrameTimeStatsProvider : public StatsProvider * @param delta_time Time since last sample * @param active_frame_idx Which of the framebuffers is active */ - Sample sample(float delta_time, uint32_t active_frame_idx) override; + Counters sample(float delta_time, uint32_t active_frame_idx) override; }; } // namespace vkb diff --git a/framework/stats/hwcpipe_stats_provider.cpp b/framework/stats/hwcpipe_stats_provider.cpp index 53b20ad5a1..9aa017d629 100644 --- a/framework/stats/hwcpipe_stats_provider.cpp +++ b/framework/stats/hwcpipe_stats_provider.cpp @@ -130,16 +130,16 @@ bool HWCPipeStatsProvider::is_available(StatIndex index) const const StatGraphData &HWCPipeStatsProvider::get_graph_data(StatIndex index) const { - static StatGraphData vertex_compute_cycles{"Vertex Compute Cycles", "{:4.1f} M/s", float(1e-6)}; - assert(is_available(index) && "HWCPipeStatsProvider::get_graph_data() called with invalid StatIndex"); + static StatGraphData vertex_compute_cycles{"Vertex Compute Cycles", "{:4.1f} M/s", float(1e-6)}; + // HWCPipe reports combined vertex/compute cycles (which is Arm specific) - // Ensure we report graph that with the correct name when asked for vertex cycles + // Ensure we report graph with the correct name when asked for vertex cycles if (index == StatIndex::gpu_vertex_cycles) return vertex_compute_cycles; - return def_graph_map[index]; + return default_graph_map[index]; } static double get_cpu_counter_value(const hwcpipe::CpuMeasurements *cpu, hwcpipe::CpuCounter counter) @@ -158,9 +158,9 @@ static double get_gpu_counter_value(const hwcpipe::GpuMeasurements *gpu, hwcpipe return 0.0; } -StatsProvider::Sample HWCPipeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +StatsProvider::Counters HWCPipeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) { - Sample res; + Counters res; hwcpipe::Measurements m = hwcpipe->sample(); // Map from hwcpipe measurement to our sample result for each counter @@ -210,7 +210,7 @@ StatsProvider::Sample HWCPipeStatsProvider::sample(float delta_time, uint32_t ac return res; } -StatsProvider::Sample HWCPipeStatsProvider::continuous_sample(float delta_time) +StatsProvider::Counters HWCPipeStatsProvider::continuous_sample(float delta_time) { return sample(delta_time, 0); } diff --git a/framework/stats/hwcpipe_stats_provider.h b/framework/stats/hwcpipe_stats_provider.h index c9de050b6d..a4f3b7ef30 100644 --- a/framework/stats/hwcpipe_stats_provider.h +++ b/framework/stats/hwcpipe_stats_provider.h @@ -50,11 +50,11 @@ class HWCPipeStatsProvider : public StatsProvider StatData() = default; /** - * @brief Constructor for CPU counters - * @param c The CPU counter to be gathered - * @param stat_scaling The scaling to be applied to the stat - * @param divisor The CPU counter to be used as divisor if scaling is ByCounter - */ + * @brief Constructor for CPU counters + * @param c The CPU counter to be gathered + * @param stat_scaling The scaling to be applied to the stat + * @param divisor The CPU counter to be used as divisor if scaling is ByCounter + */ StatData(hwcpipe::CpuCounter c, StatScaling stat_scaling = StatScaling::ByDeltaTime, hwcpipe::CpuCounter divisor = hwcpipe::CpuCounter::MaxValue) : @@ -65,11 +65,11 @@ class HWCPipeStatsProvider : public StatsProvider {} /** - * @brief Constructor for GPU counters - * @param c The GPU counter to be gathered - * @param stat_scaling The scaling to be applied to the stat - * @param divisor The GPU counter to be used as divisor if scaling is ByCounter - */ + * @brief Constructor for GPU counters + * @param c The GPU counter to be gathered + * @param stat_scaling The scaling to be applied to the stat + * @param divisor The GPU counter to be used as divisor if scaling is ByCounter + */ StatData(hwcpipe::GpuCounter c, StatScaling stat_scaling = StatScaling::ByDeltaTime, hwcpipe::GpuCounter divisor = hwcpipe::GpuCounter::MaxValue) : @@ -106,15 +106,15 @@ class HWCPipeStatsProvider : public StatsProvider /** * @brief Retrieve a new sample set from polled sampling * @param delta_time Time since last sample - * @param active_frame_idx Which of the framebuffers is active + * @param active_frame_idx Which of the framebuffers is active - unused by this HWCPipeStatsProvider */ - Sample sample(float delta_time, uint32_t active_frame_idx) override; + Counters sample(float delta_time, uint32_t active_frame_idx) override; /** * @brief Retrieve a new sample set from continuous sampling * @param delta_time Time since last sample */ - Sample continuous_sample(float delta_time) override; + Counters continuous_sample(float delta_time) override; private: // The hwcpipe instance diff --git a/framework/stats/stats.cpp b/framework/stats/stats.cpp index 74d3a126e4..97fc06a8d7 100644 --- a/framework/stats/stats.cpp +++ b/framework/stats/stats.cpp @@ -23,6 +23,7 @@ #include "frame_time_stats_provider.h" #include "hwcpipe_stats_provider.h" #include "vulkan_stats_provider.h" + namespace vkb { Stats::Stats(Device &device, size_t num_framebuffers, const std::set &requested_stats, @@ -118,7 +119,7 @@ void Stats::update(float delta_time, uint32_t active_frame_idx) { case CounterSamplingMode::Polling: { - StatsProvider::Sample sample; + StatsProvider::Counters sample; for (auto &p : providers) { @@ -164,7 +165,7 @@ void Stats::update(float delta_time, uint32_t active_frame_idx) sample_count = std::max(1, std::min(sample_count, pending_samples.size())); // Get the frame time stats (not a continuous stat) - StatsProvider::Sample frame_time_sample = frame_time_provider->sample(delta_time, active_frame_idx); + StatsProvider::Counters frame_time_sample = frame_time_provider->sample(delta_time, active_frame_idx); // Push the samples to circular buffers std::for_each(pending_samples.end() - sample_count, pending_samples.end(), [this, frame_time_sample](auto &s) { @@ -200,10 +201,10 @@ void Stats::continuous_sampling_worker(std::future should_terminate) } // Sample counters - StatsProvider::Sample sample; + StatsProvider::Counters sample; for (auto &p : providers) { - StatsProvider::Sample s = p->continuous_sample(delta_time); + StatsProvider::Counters s = p->continuous_sample(delta_time); sample.insert(s.begin(), s.end()); } @@ -215,7 +216,7 @@ void Stats::continuous_sampling_worker(std::future should_terminate) } } -void Stats::push_sample(const StatsProvider::Sample &sample) +void Stats::push_sample(const StatsProvider::Counters &sample) { for (auto &c : counters) { diff --git a/framework/stats/stats.h b/framework/stats/stats.h index d3e637b7dd..5885900ded 100644 --- a/framework/stats/stats.h +++ b/framework/stats/stats.h @@ -105,14 +105,28 @@ class Stats void update(float delta_time, uint32_t active_frame_idx); /** - * @brief A command buffer that we want stats about has just begun + * @brief A command buffer that we want to collect stats about has just begun + * + * Some stats providers (like the Vulkan extension one) can only collect stats + * about the execution of a specific command buffer. In those cases we need to + * know when a command buffer has begun and when it's about to end so that we + * can inject some extra commands into the command buffer to control the stats + * collection. This method tells the stats provider that a command buffer has + * begun so that can happen. * @param cb The command buffer * @param active_frame_idx Which of the framebuffers is active */ void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx); /** - * @brief A command buffer that we want stats about is about to be ended + * @brief A command buffer that we want to collect stats about is about to be ended + * + * Some stats providers (like the Vulkan extension one) can only collect stats + * about the execution of a specific command buffer. In those cases we need to + * know when a command buffer has begun and when it's about to end so that we + * can inject some extra commands into the command buffer to control the stats + * collection. This method tells the stats provider that a command buffer is + * about to be ended so that can happen. * @param cb The command buffer * @param active_frame_idx Which of the framebuffers is active */ @@ -153,20 +167,20 @@ class Stats std::mutex continuous_sampling_mutex; /// The samples read during continuous sampling - std::vector continuous_samples; + std::vector continuous_samples; /// A flag specifying if the worker thread should add entries to continuous_samples bool should_add_to_continuous_samples{false}; /// The samples waiting to be displayed - std::vector pending_samples; + std::vector pending_samples; /// The worker thread function for continuous sampling; /// it adds a new entry to continuous_samples at every interval void continuous_sampling_worker(std::future should_terminate); /// Updates circular buffers for CPU and GPU counters - void push_sample(const StatsProvider::Sample &sample); + void push_sample(const StatsProvider::Counters &sample); }; } // namespace vkb diff --git a/framework/stats/stats_common.h b/framework/stats/stats_common.h index 3334fbb79e..73a5d25c21 100644 --- a/framework/stats/stats_common.h +++ b/framework/stats/stats_common.h @@ -95,13 +95,13 @@ class StatGraphData { public: /** - * @brief Constructs data for the graph - * @param name Name of the Stat - * @param format Format of the label - * @param scale_factor Any scaling to apply to the data - * @param has_fixed_max Whether the data should have a fixed max value - * @param max_value The maximum value to use - */ + * @brief Constructs data for the graph + * @param name Name of the Stat + * @param format Format of the label + * @param scale_factor Any scaling to apply to the data + * @param has_fixed_max Whether the data should have a fixed max value + * @param max_value The maximum value to use + */ StatGraphData(const std::string &name, const std::string &format, float scale_factor = 1.0f, diff --git a/framework/stats/stats_provider.cpp b/framework/stats/stats_provider.cpp index 53b59ad7e2..e2896785df 100644 --- a/framework/stats/stats_provider.cpp +++ b/framework/stats/stats_provider.cpp @@ -20,7 +20,7 @@ namespace vkb { // Default graphing values for stats. May be overridden by individual providers. -std::map StatsProvider::def_graph_map{ +std::map StatsProvider::default_graph_map{ // clang-format off // StatIndex Name shown in graph Format Scale fixed_max max_value {StatIndex::frame_times, {"Frame Times", "{:3.1f} ms", 1000.0f}}, @@ -47,7 +47,7 @@ std::map StatsProvider::def_graph_map{ // Static const StatGraphData &StatsProvider::default_graph_data(StatIndex index) { - return def_graph_map.at(index); + return default_graph_map.at(index); } } // namespace vkb diff --git a/framework/stats/stats_provider.h b/framework/stats/stats_provider.h index f091544428..cfa48cf09e 100644 --- a/framework/stats/stats_provider.h +++ b/framework/stats/stats_provider.h @@ -38,7 +38,7 @@ class StatsProvider double result; }; - using Sample = std::unordered_map; + using Counters = std::unordered_map; /** * @brief Virtual Destructor @@ -59,7 +59,7 @@ class StatsProvider */ virtual const StatGraphData &get_graph_data(StatIndex index) const { - return def_graph_map.at(index); + return default_graph_map.at(index); } /** @@ -73,13 +73,13 @@ class StatsProvider * @param delta_time Time since last sample * @param active_frame_idx Which of the framebuffers is active */ - virtual Sample sample(float delta_time, uint32_t active_frame_idx) = 0; + virtual Counters sample(float delta_time, uint32_t active_frame_idx) = 0; /** * @brief Retrieve a new sample set from continuous sampling * @param delta_time Time since last sample */ - virtual Sample continuous_sample(float delta_time) + virtual Counters continuous_sample(float delta_time) { return Sample(); } @@ -101,6 +101,6 @@ class StatsProvider {} protected: - static std::map def_graph_map; + static std::map default_graph_map; }; } // namespace vkb diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp index f6362b3235..00f3267e87 100644 --- a/framework/stats/vulkan_stats_provider.cpp +++ b/framework/stats/vulkan_stats_provider.cpp @@ -306,7 +306,7 @@ const StatGraphData &VulkanStatsProvider::get_graph_data(StatIndex index) const if (data.has_vendor_graph_data) return data.graph_data; - return def_graph_map[index]; + return default_graph_map[index]; } void VulkanStatsProvider::command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) @@ -396,9 +396,9 @@ float VulkanStatsProvider::get_best_delta_time(float sw_delta_time, uint32_t act return delta_time; } -StatsProvider::Sample VulkanStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +StatsProvider::Counters VulkanStatsProvider::sample(float delta_time, uint32_t active_frame_idx) { - Sample out; + Counters out; if (!query_pool || queries_ready == 0) return out; diff --git a/framework/stats/vulkan_stats_provider.h b/framework/stats/vulkan_stats_provider.h index ea1c15f21a..42dd6ab922 100644 --- a/framework/stats/vulkan_stats_provider.h +++ b/framework/stats/vulkan_stats_provider.h @@ -108,7 +108,7 @@ class VulkanStatsProvider : public StatsProvider * @param delta_time Time since last sample * @param active_frame_idx Which of the framebuffers is active */ - Sample sample(float delta_time, uint32_t active_frame_idx) override; + Counters sample(float delta_time, uint32_t active_frame_idx) override; /** * @brief A command buffer that we want stats about has just begun @@ -145,7 +145,7 @@ class VulkanStatsProvider : public StatsProvider // The timestamp period float timestamp_period{1.0f}; - // And one for timestamps + // Query pool for timestamps std::unique_ptr timestamp_pool; // Map of vendor specific stat data @@ -157,6 +157,7 @@ class VulkanStatsProvider : public StatsProvider // An ordered list of the Vulkan counter ids std::vector counter_indices; + // How many queries have been ended? uint32_t queries_ready = 0; }; diff --git a/framework/vulkan_sample.cpp b/framework/vulkan_sample.cpp index 77cf51befb..461f739966 100644 --- a/framework/vulkan_sample.cpp +++ b/framework/vulkan_sample.cpp @@ -194,6 +194,7 @@ void VulkanSample::update(float delta_time) auto &command_buffer = render_context->begin(); + // Collect the performance data for the sample graphs uint32_t active_frame_idx = render_context->get_active_frame_index(); update_stats(delta_time, active_frame_idx); From 648e2c1966ddae7570ade017b9b71fded1208cc5 Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Wed, 13 May 2020 14:36:49 +0100 Subject: [PATCH 05/11] Moved some things into physical device Simplifies code in device --- framework/core/device.cpp | 46 +++++-------------- framework/core/device.h | 4 +- framework/core/physical_device.cpp | 54 +++++++++++++++++++---- framework/core/physical_device.h | 34 ++++++++++++++ framework/stats/stats_provider.h | 2 +- framework/stats/vulkan_stats_provider.cpp | 17 +++---- 6 files changed, 102 insertions(+), 55 deletions(-) diff --git a/framework/core/device.cpp b/framework/core/device.cpp index 501a80e1af..eded99b172 100644 --- a/framework/core/device.cpp +++ b/framework/core/device.cpp @@ -25,7 +25,7 @@ VKBP_ENABLE_WARNINGS() namespace vkb { -Device::Device(const PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_map requested_extensions) : +Device::Device(PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_map requested_extensions) : gpu{gpu}, resource_cache{*this} { @@ -81,24 +81,15 @@ Device::Device(const PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_m LOGI("Dedicated Allocation enabled"); } - VkPhysicalDeviceHostQueryResetFeatures hqr_features{}; - hqr_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES; - - VkPhysicalDevicePerformanceQueryFeaturesKHR perf_features{}; - perf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR; - perf_features.pNext = &hqr_features; - if (has_performance_query) { - // Must have VK_KHR_get_physical_device_properties2 as it's a prerequisite of perf query - VkPhysicalDeviceFeatures2KHR supported_features{}; - supported_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - supported_features.pNext = &perf_features; + gpu.request_performance_counter_features(); + gpu.request_host_query_reset_features(); - // Check the feature support bits - vkGetPhysicalDeviceFeatures2KHR(gpu.get_handle(), &supported_features); + auto perf_counter_features = gpu.get_performance_counter_features(); + auto host_query_reset_features = gpu.get_host_query_reset_features(); - if (perf_features.performanceCounterQueryPools && hqr_features.hostQueryReset) + if (perf_counter_features.performanceCounterQueryPools && host_query_reset_features.hostQueryReset) { enabled_extensions.push_back("VK_KHR_performance_query"); enabled_extensions.push_back("VK_EXT_host_query_reset"); @@ -165,26 +156,9 @@ Device::Device(const PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_m const auto requested_gpu_features = gpu.get_requested_features(); - if (has_performance_query) - { - // Ensure we turn on the feature bits we want - VkPhysicalDeviceFeatures2KHR requested_features{}; - requested_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; - requested_features.features = requested_gpu_features; - requested_features.pNext = &perf_features; - - // The pNext chain will be perf_features -> hqr_features -> gpu.get_requested_extension_features() - hqr_features.pNext = gpu.get_requested_extension_features(); - - create_info.pNext = &requested_features; - create_info.pEnabledFeatures = nullptr; - } - else - { - // Latest requested feature will have the pNext's all set up for device creation. - create_info.pNext = gpu.get_requested_extension_features(); - create_info.pEnabledFeatures = &requested_gpu_features; - } + // Latest requested feature will have the pNext's all set up for device creation. + create_info.pNext = gpu.get_requested_extension_features(); + create_info.pEnabledFeatures = &requested_gpu_features; VkResult result = vkCreateDevice(gpu.get_handle(), &create_info, nullptr, &handle); @@ -253,7 +227,7 @@ Device::Device(const PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_m command_pool = std::make_unique(*this, get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0).get_family_index()); fence_pool = std::make_unique(*this); -} +} // namespace vkb Device::~Device() { diff --git a/framework/core/device.h b/framework/core/device.h index 07652cf07d..40d785b88c 100644 --- a/framework/core/device.h +++ b/framework/core/device.h @@ -54,11 +54,11 @@ class Device public: /** * @brief Device constructor - * @param gpu A valid Vulkan physical device and the requested gpu features + * @param gpu A valid Vulkan physical device and the requested gpu features * @param surface The surface * @param requested_extensions (Optional) List of required device extensions and whether support is optional or not */ - Device(const PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_map requested_extensions = {}); + Device(PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_map requested_extensions = {}); Device(const Device &) = delete; diff --git a/framework/core/physical_device.cpp b/framework/core/physical_device.cpp index c3fb2c6f88..8c31b64c36 100644 --- a/framework/core/physical_device.cpp +++ b/framework/core/physical_device.cpp @@ -86,6 +86,25 @@ const std::vector &PhysicalDevice::get_queue_family_pro return queue_family_properties; } +uint32_t PhysicalDevice::get_queue_family_performance_query_passes( + const VkQueryPoolPerformanceCreateInfoKHR *perf_query_create_info) const +{ + uint32_t passes_needed; + vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(get_handle(), perf_query_create_info, + &passes_needed); + return passes_needed; +} + +void PhysicalDevice::enumerate_queue_family_performance_query_counters( + uint32_t queue_family_index, + uint32_t * count, + VkPerformanceCounterKHR * counters, + VkPerformanceCounterDescriptionKHR *descriptions) const +{ + VK_CHECK(vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + get_handle(), queue_family_index, count, counters, descriptions)); +} + VkPhysicalDeviceFeatures &PhysicalDevice::get_mutable_requested_features() { return requested_features; @@ -111,14 +130,7 @@ void PhysicalDevice::request_descriptor_indexing_features() // Request the relevant extension descriptor_indexing_features = request_extension_features(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT); - // If an extension has already been requested, set that to the pNext element - if (last_requested_extension_feature) - { - descriptor_indexing_features.pNext = last_requested_extension_feature; - } - - // Set the last requested extension to the pointer of the most recently requested extension - last_requested_extension_feature = &descriptor_indexing_features; + chain_extension_features(descriptor_indexing_features); } const VkPhysicalDeviceDescriptorIndexingFeaturesEXT &PhysicalDevice::get_descriptor_indexing_features() const @@ -126,4 +138,30 @@ const VkPhysicalDeviceDescriptorIndexingFeaturesEXT &PhysicalDevice::get_descrip return descriptor_indexing_features; } +void PhysicalDevice::request_performance_counter_features() +{ + // Request the relevant extensions + performance_counter_features = request_extension_features(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR); + + chain_extension_features(performance_counter_features); +} + +const VkPhysicalDevicePerformanceQueryFeaturesKHR &PhysicalDevice::get_performance_counter_features() const +{ + return performance_counter_features; +} + +void PhysicalDevice::request_host_query_reset_features() +{ + // Request the relevant extension + host_query_reset_features = request_extension_features(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES); + + chain_extension_features(host_query_reset_features); +} + +const VkPhysicalDeviceHostQueryResetFeatures &PhysicalDevice::get_host_query_reset_features() const +{ + return host_query_reset_features; +} + } // namespace vkb diff --git a/framework/core/physical_device.h b/framework/core/physical_device.h index fb797e35d9..3b011090a9 100644 --- a/framework/core/physical_device.h +++ b/framework/core/physical_device.h @@ -57,6 +57,15 @@ class PhysicalDevice const std::vector &get_queue_family_properties() const; + uint32_t get_queue_family_performance_query_passes( + const VkQueryPoolPerformanceCreateInfoKHR *perf_query_create_info) const; + + void enumerate_queue_family_performance_query_counters( + uint32_t queue_family_index, + uint32_t * count, + VkPerformanceCounterKHR * counters, + VkPerformanceCounterDescriptionKHR *descriptions) const; + VkPhysicalDeviceFeatures &get_mutable_requested_features(); const VkPhysicalDeviceFeatures get_requested_features() const; @@ -70,6 +79,14 @@ class PhysicalDevice const VkPhysicalDeviceDescriptorIndexingFeaturesEXT &get_descriptor_indexing_features() const; + void request_performance_counter_features(); + + const VkPhysicalDevicePerformanceQueryFeaturesKHR &get_performance_counter_features() const; + + void request_host_query_reset_features(); + + const VkPhysicalDeviceHostQueryResetFeatures &get_host_query_reset_features() const; + protected: template const T request_extension_features(VkStructureType type) @@ -89,6 +106,19 @@ class PhysicalDevice return ext; } + template + void chain_extension_features(T &features) + { + // If an extension has already been requested, set that to the pNext element + if (last_requested_extension_feature) + { + features.pNext = last_requested_extension_feature; + } + + // Set the last requested extension to the pointer of the most recently requested extension + last_requested_extension_feature = &features; + } + private: // Handle to the Vulkan instance const Instance &instance; @@ -115,5 +145,9 @@ class PhysicalDevice void *last_requested_extension_feature{nullptr}; VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features{}; + + VkPhysicalDevicePerformanceQueryFeaturesKHR performance_counter_features{}; + + VkPhysicalDeviceHostQueryResetFeatures host_query_reset_features{}; }; } // namespace vkb diff --git a/framework/stats/stats_provider.h b/framework/stats/stats_provider.h index cfa48cf09e..84f132ab6a 100644 --- a/framework/stats/stats_provider.h +++ b/framework/stats/stats_provider.h @@ -81,7 +81,7 @@ class StatsProvider */ virtual Counters continuous_sample(float delta_time) { - return Sample(); + return Counters(); } /** diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp index 00f3267e87..1774f1cc74 100644 --- a/framework/stats/vulkan_stats_provider.cpp +++ b/framework/stats/vulkan_stats_provider.cpp @@ -44,8 +44,8 @@ VulkanStatsProvider::VulkanStatsProvider(Device &device, std::set &re // Query number of available counters uint32_t count = 0; - VK_CHECK(vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( - gpu.get_handle(), queue_family_index, &count, nullptr, nullptr)); + gpu.enumerate_queue_family_performance_query_counters(queue_family_index, &count, + nullptr, nullptr); if (count == 0) return; // No counters available @@ -62,8 +62,8 @@ VulkanStatsProvider::VulkanStatsProvider(Device &device, std::set &re } // Now get the list of counters and their descriptions - VK_CHECK(vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( - gpu.get_handle(), queue_family_index, &count, counters.data(), descs.data())); + gpu.enumerate_queue_family_performance_query_counters(queue_family_index, &count, + counters.data(), descs.data()); // Every vendor has a different set of performance counters each // with different names. Match them to the stats we want, where available. @@ -204,7 +204,10 @@ bool VulkanStatsProvider::fill_vendor_data() return true; } #endif - return false; // Unsupported vendor + { + // Unsupported vendor + return false; + } } bool VulkanStatsProvider::create_query_pools(uint32_t num_framebuffers, uint32_t queue_family_index) @@ -220,9 +223,7 @@ bool VulkanStatsProvider::create_query_pools(uint32_t num_framebuffers, uint32_t perf_create_info.counterIndexCount = uint32_t(counter_indices.size()); perf_create_info.pCounterIndices = counter_indices.data(); - uint32_t passes_needed; - vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(gpu.get_handle(), &perf_create_info, - &passes_needed); + uint32_t passes_needed = gpu.get_queue_family_performance_query_passes(&perf_create_info); if (passes_needed != 1) { // Needs more than one pass, remove all our supported stats From 90210c73e458bc8cc5248a360a54637ce5b1ad50 Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Wed, 13 May 2020 15:12:15 +0100 Subject: [PATCH 06/11] Fixed incorrect namespace comment --- framework/core/device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/framework/core/device.cpp b/framework/core/device.cpp index eded99b172..799ff76f78 100644 --- a/framework/core/device.cpp +++ b/framework/core/device.cpp @@ -227,7 +227,7 @@ Device::Device(PhysicalDevice &gpu, VkSurfaceKHR surface, std::unordered_map(*this, get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0).get_family_index()); fence_pool = std::make_unique(*this); -} // namespace vkb +} Device::~Device() { From f67eb72b0c0850a32dc8e6678816a844b8b953ad Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Mon, 18 May 2020 12:51:29 +0100 Subject: [PATCH 07/11] Tweak to Broadcom specific counters --- framework/stats/vulkan_stats_provider.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp index 1774f1cc74..b94f260957 100644 --- a/framework/stats/vulkan_stats_provider.cpp +++ b/framework/stats/vulkan_stats_provider.cpp @@ -181,8 +181,8 @@ bool VulkanStatsProvider::fill_vendor_data() // clang-format off vendor_data = { {StatIndex::gpu_cycles, {"cycle_count"}}, - {StatIndex::gpu_vertex_cycles, {"qpu_active_cycles_vertex_coord_user"}}, - {StatIndex::gpu_fragment_cycles, {"qpu_active_cycles_fragment"}}, + {StatIndex::gpu_vertex_cycles, {"gpu_vertex_cycles"}}, + {StatIndex::gpu_fragment_cycles, {"gpu_fragment_cycles"}}, {StatIndex::gpu_fragment_jobs, {"render_jobs_completed"}}, {StatIndex::gpu_ext_reads, {"gpu_mem_reads"}}, {StatIndex::gpu_ext_writes, {"gpu_mem_writes"}}, @@ -192,7 +192,7 @@ bool VulkanStatsProvider::fill_vendor_data() // clang-format on // Override vendor-specific graph data - vendor_data.at(StatIndex::gpu_vertex_cycles).set_vendor_graph_data({"Vertex/Coord/User Cycles", "{:4.0f}/s"}); + vendor_data.at(StatIndex::gpu_vertex_cycles).set_vendor_graph_data({"Vertex/Coord/User Cycles", "{:4.1f} M/s", float(1e-6)}); vendor_data.at(StatIndex::gpu_fragment_jobs).set_vendor_graph_data({"Render Jobs", "{:4.0f}/s"}); return true; From 70dc3895eebbdff43ffb1e048cc0eb1df8a341a6 Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Tue, 19 May 2020 13:21:05 +0100 Subject: [PATCH 08/11] Reworked top level stats interface and construction Based on private feedback from Zandro, I've moved the construction of the stats object into the base VulkanSamples class which leaves individual samples to just request the samples they would like. The active_frame_index is no longer passed to update_stats() or the command_buffer_begun/ended() methods. It is fetched internally by the providers that require it. --- bldsys/cmake/template/sample/sample.cpp.in | 6 +- framework/stats/frame_time_stats_provider.cpp | 2 +- framework/stats/frame_time_stats_provider.h | 3 +- framework/stats/hwcpipe_stats_provider.cpp | 4 +- framework/stats/hwcpipe_stats_provider.h | 3 +- framework/stats/stats.cpp | 65 +++++++++++-------- framework/stats/stats.h | 35 ++++++---- framework/stats/stats_provider.h | 9 +-- framework/stats/vulkan_stats_provider.cpp | 41 ++++++++---- framework/stats/vulkan_stats_provider.h | 25 ++++--- framework/vulkan_sample.cpp | 13 ++-- framework/vulkan_sample.h | 3 +- samples/performance/afbc/afbc.cpp | 6 +- .../command_buffer_usage.cpp | 14 ++-- .../descriptor_management.cpp | 15 ++--- .../layout_transitions/layout_transitions.cpp | 8 +-- samples/performance/msaa/msaa.cpp | 9 +-- .../pipeline_barriers/pipeline_barriers.cpp | 10 +-- .../pipeline_cache/pipeline_cache.cpp | 5 +- .../render_passes/render_passes.cpp | 8 +-- .../render_subpasses/render_subpasses.cpp | 8 +-- .../specialization_constants.cpp | 5 +- .../surface_rotation/surface_rotation.cpp | 7 +- .../swapchain_images/swapchain_images.cpp | 7 +- samples/performance/wait_idle/wait_idle.cpp | 7 +- 25 files changed, 152 insertions(+), 166 deletions(-) diff --git a/bldsys/cmake/template/sample/sample.cpp.in b/bldsys/cmake/template/sample/sample.cpp.in index 332e1dc4e8..8c6af69a06 100644 --- a/bldsys/cmake/template/sample/sample.cpp.in +++ b/bldsys/cmake/template/sample/sample.cpp.in @@ -51,11 +51,9 @@ bool @SAMPLE_NAME@::prepare(vkb::Platform &platform) render_pipeline.add_subpass(std::move(scene_subpass)); set_render_pipeline(std::move(render_pipeline)); - size_t num_framebuffers = get_render_context().get_render_frames().size(); - // Add a GUI with the stats you want to monitor - stats = std::make_unique(get_device(), num_framebuffers, std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + stats->request_stats({/*stats you require*/}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/framework/stats/frame_time_stats_provider.cpp b/framework/stats/frame_time_stats_provider.cpp index e93ba3a45e..ebdc082e5e 100644 --- a/framework/stats/frame_time_stats_provider.cpp +++ b/framework/stats/frame_time_stats_provider.cpp @@ -32,7 +32,7 @@ bool FrameTimeStatsProvider::is_available(StatIndex index) const return index == StatIndex::frame_times; } -StatsProvider::Counters FrameTimeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +StatsProvider::Counters FrameTimeStatsProvider::sample(float delta_time) { Counters res; // frame_times comes directly from delta_time diff --git a/framework/stats/frame_time_stats_provider.h b/framework/stats/frame_time_stats_provider.h index 73c24e5aa9..f62f2aa4ca 100644 --- a/framework/stats/frame_time_stats_provider.h +++ b/framework/stats/frame_time_stats_provider.h @@ -39,8 +39,7 @@ class FrameTimeStatsProvider : public StatsProvider /** * @brief Retrieve a new sample set * @param delta_time Time since last sample - * @param active_frame_idx Which of the framebuffers is active */ - Counters sample(float delta_time, uint32_t active_frame_idx) override; + Counters sample(float delta_time) override; }; } // namespace vkb diff --git a/framework/stats/hwcpipe_stats_provider.cpp b/framework/stats/hwcpipe_stats_provider.cpp index 9aa017d629..e8ce3c49cd 100644 --- a/framework/stats/hwcpipe_stats_provider.cpp +++ b/framework/stats/hwcpipe_stats_provider.cpp @@ -158,7 +158,7 @@ static double get_gpu_counter_value(const hwcpipe::GpuMeasurements *gpu, hwcpipe return 0.0; } -StatsProvider::Counters HWCPipeStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +StatsProvider::Counters HWCPipeStatsProvider::sample(float delta_time) { Counters res; hwcpipe::Measurements m = hwcpipe->sample(); @@ -212,7 +212,7 @@ StatsProvider::Counters HWCPipeStatsProvider::sample(float delta_time, uint32_t StatsProvider::Counters HWCPipeStatsProvider::continuous_sample(float delta_time) { - return sample(delta_time, 0); + return sample(delta_time); } } // namespace vkb diff --git a/framework/stats/hwcpipe_stats_provider.h b/framework/stats/hwcpipe_stats_provider.h index a4f3b7ef30..38ebf4b0f4 100644 --- a/framework/stats/hwcpipe_stats_provider.h +++ b/framework/stats/hwcpipe_stats_provider.h @@ -106,9 +106,8 @@ class HWCPipeStatsProvider : public StatsProvider /** * @brief Retrieve a new sample set from polled sampling * @param delta_time Time since last sample - * @param active_frame_idx Which of the framebuffers is active - unused by this HWCPipeStatsProvider */ - Counters sample(float delta_time, uint32_t active_frame_idx) override; + Counters sample(float delta_time) override; /** * @brief Retrieve a new sample set from continuous sampling diff --git a/framework/stats/stats.cpp b/framework/stats/stats.cpp index 97fc06a8d7..e20475adb7 100644 --- a/framework/stats/stats.cpp +++ b/framework/stats/stats.cpp @@ -26,13 +26,37 @@ namespace vkb { -Stats::Stats(Device &device, size_t num_framebuffers, const std::set &requested_stats, - CounterSamplingConfig sampling_config, size_t buffer_size) : - requested_stats(requested_stats), - sampling_config(sampling_config), - stop_worker(std::make_unique>()) +Stats::Stats(RenderContext &render_context, size_t buffer_size) : + render_context(render_context), + buffer_size(buffer_size) { assert(buffer_size >= 2 && "Buffers size should be greater than 2"); +} + +Stats::~Stats() +{ + if (stop_worker) + { + stop_worker->set_value(); + } + + if (worker_thread.joinable()) + { + worker_thread.join(); + } +} + +void Stats::request_stats(const std::set &wanted_stats, + CounterSamplingConfig config) +{ + if (providers.size() != 0) + { + LOGE("Stats must only be requested once"); + throw std::runtime_error("Stats must only be requested once"); + } + + requested_stats = wanted_stats; + sampling_config = config; // Copy the requested stats, so they can be changed by the providers below std::set stats = requested_stats; @@ -42,7 +66,7 @@ Stats::Stats(Device &device, size_t num_framebuffers, const std::set // so subsequent providers only see requests for stats that aren't already supported. providers.emplace_back(std::make_unique(stats)); providers.emplace_back(std::make_unique(stats, sampling_config)); - providers.emplace_back(std::make_unique(device, stats, sampling_config, num_framebuffers)); + providers.emplace_back(std::make_unique(stats, sampling_config, render_context)); // In continuous sampling mode we still need to update the frame times as if we are polling // Store the frame time provider here so we can easily access it later. @@ -56,6 +80,8 @@ Stats::Stats(Device &device, size_t num_framebuffers, const std::set if (sampling_config.mode == CounterSamplingMode::Continuous) { // Start a thread for continuous sample capture + stop_worker = std::make_unique>(); + worker_thread = std::thread([this] { continuous_sampling_worker(stop_worker->get_future()); }); @@ -65,19 +91,6 @@ Stats::Stats(Device &device, size_t num_framebuffers, const std::set } } -Stats::~Stats() -{ - if (stop_worker) - { - stop_worker->set_value(); - } - - if (worker_thread.joinable()) - { - worker_thread.join(); - } -} - void Stats::resize(const size_t width) { // The circular buffer size will be 1/16th of the width of the screen @@ -113,7 +126,7 @@ static void add_smoothed_value(std::vector &values, float value, float al values.back() = value * alpha + *(values.end() - 2) * (1.0f - alpha); } -void Stats::update(float delta_time, uint32_t active_frame_idx) +void Stats::update(float delta_time) { switch (sampling_config.mode) { @@ -123,7 +136,7 @@ void Stats::update(float delta_time, uint32_t active_frame_idx) for (auto &p : providers) { - auto s = p->sample(delta_time, active_frame_idx); + auto s = p->sample(delta_time); sample.insert(s.begin(), s.end()); } push_sample(sample); @@ -165,7 +178,7 @@ void Stats::update(float delta_time, uint32_t active_frame_idx) sample_count = std::max(1, std::min(sample_count, pending_samples.size())); // Get the frame time stats (not a continuous stat) - StatsProvider::Counters frame_time_sample = frame_time_provider->sample(delta_time, active_frame_idx); + StatsProvider::Counters frame_time_sample = frame_time_provider->sample(delta_time); // Push the samples to circular buffers std::for_each(pending_samples.end() - sample_count, pending_samples.end(), [this, frame_time_sample](auto &s) { @@ -234,18 +247,18 @@ void Stats::push_sample(const StatsProvider::Counters &sample) } } -void Stats::command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) +void Stats::command_buffer_begun(CommandBuffer &cb) { // Inform the providers for (auto &p : providers) - p->command_buffer_begun(cb, active_frame_idx); + p->command_buffer_begun(cb); } -void Stats::command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) +void Stats::command_buffer_ending(CommandBuffer &cb) { // Inform the providers for (auto &p : providers) - p->command_buffer_ending(cb, active_frame_idx); + p->command_buffer_ending(cb); } const StatGraphData &Stats::get_graph_data(StatIndex index) const diff --git a/framework/stats/stats.h b/framework/stats/stats.h index 5885900ded..d59c1196d7 100644 --- a/framework/stats/stats.h +++ b/framework/stats/stats.h @@ -35,6 +35,7 @@ namespace vkb { class Device; class CommandBuffer; +class RenderContext; /* * @brief Helper class for querying statistics about the CPU and the GPU @@ -44,23 +45,24 @@ class Stats public: /** * @brief Constructs a Stats object - * @param device Device on which to collect stats - * @param num_framebuffers The number of buffers in the swapchain - * @param requested_stats Set of stats to be collected if available - * @param sampling_config Sampling mode configuration (polling or continuous) + * @param render_context The RenderContext for this sample * @param buffer_size Size of the circular buffers */ - Stats(Device & device, - size_t num_framebuffers, - const std::set &requested_stats, - CounterSamplingConfig sampling_config = {CounterSamplingMode::Polling}, - size_t buffer_size = 16); + Stats(RenderContext &render_context, size_t buffer_size = 16); /** * @brief Destroys the Stats object */ ~Stats(); + /** + * @brief Request specific set of stats to be collected + * @param requested_stats Set of stats to be collected if available + * @param sampling_config Sampling mode configuration (polling or continuous) + */ + void request_stats(const std::set &requested_stats, + CounterSamplingConfig sampling_config = {CounterSamplingMode::Polling}); + /** * @brief Resizes the stats buffers according to the width of the screen * @param width The width of the screen @@ -101,8 +103,9 @@ class Stats /** * @brief Update statistics, must be called after every frame + * @param delta_time Time since last update */ - void update(float delta_time, uint32_t active_frame_idx); + void update(float delta_time); /** * @brief A command buffer that we want to collect stats about has just begun @@ -114,9 +117,8 @@ class Stats * collection. This method tells the stats provider that a command buffer has * begun so that can happen. * @param cb The command buffer - * @param active_frame_idx Which of the framebuffers is active */ - void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx); + void command_buffer_begun(CommandBuffer &cb); /** * @brief A command buffer that we want to collect stats about is about to be ended @@ -128,11 +130,13 @@ class Stats * collection. This method tells the stats provider that a command buffer is * about to be ended so that can happen. * @param cb The command buffer - * @param active_frame_idx Which of the framebuffers is active */ - void command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx); + void command_buffer_ending(CommandBuffer &cb); private: + /// The render context + RenderContext &render_context; + /// Stats that were requested - they may not all be available std::set requested_stats; @@ -145,6 +149,9 @@ class Stats /// Counter sampling configuration CounterSamplingConfig sampling_config; + /// Size of the circular buffers + size_t buffer_size; + /// Timer used in the main thread to compute delta time Timer main_timer; diff --git a/framework/stats/stats_provider.h b/framework/stats/stats_provider.h index 84f132ab6a..d01b6867c9 100644 --- a/framework/stats/stats_provider.h +++ b/framework/stats/stats_provider.h @@ -71,9 +71,8 @@ class StatsProvider /** * @brief Retrieve a new sample set * @param delta_time Time since last sample - * @param active_frame_idx Which of the framebuffers is active */ - virtual Counters sample(float delta_time, uint32_t active_frame_idx) = 0; + virtual Counters sample(float delta_time) = 0; /** * @brief Retrieve a new sample set from continuous sampling @@ -87,17 +86,15 @@ class StatsProvider /** * @brief A command buffer that we want stats about has just begun * @param cb The command buffer - * @param active_frame_idx Which of the framebuffers is active */ - virtual void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) + virtual void command_buffer_begun(CommandBuffer &cb) {} /** * @brief A command buffer that we want stats about is about to be ended * @param cb The command buffer - * @param active_frame_idx Which of the framebuffers is active */ - virtual void command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) + virtual void command_buffer_ending(CommandBuffer &cb) {} protected: diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp index b94f260957..1c04a2a3d1 100644 --- a/framework/stats/vulkan_stats_provider.cpp +++ b/framework/stats/vulkan_stats_provider.cpp @@ -19,22 +19,24 @@ #include "core/device.h" #include "core/command_buffer.h" +#include "rendering/render_context.h" #include "vulkan_stats_provider.h" #include namespace vkb { -VulkanStatsProvider::VulkanStatsProvider(Device &device, std::set &requested_stats, +VulkanStatsProvider::VulkanStatsProvider(std::set & requested_stats, const CounterSamplingConfig &sampling_config, - size_t num_framebuffers) : - device(device) + RenderContext & render_context) : + render_context(render_context) { // Check all the Vulkan capabilities we require are present if (!is_supported(sampling_config)) return; - const PhysicalDevice &gpu = device.get_gpu(); + Device & device = render_context.get_device(); + const PhysicalDevice &gpu = device.get_gpu(); has_timestamps = gpu.get_properties().limits.timestampComputeAndGraphics; timestamp_period = gpu.get_properties().limits.timestampPeriod; @@ -145,7 +147,7 @@ VulkanStatsProvider::VulkanStatsProvider(Device &device, std::set &re } // Now we know the counters and that we can collect them, make a query pool for the results. - if (!create_query_pools(uint32_t(num_framebuffers), queue_family_index)) + if (!create_query_pools(queue_family_index)) { stat_data.clear(); counter_indices.clear(); @@ -164,13 +166,13 @@ VulkanStatsProvider::~VulkanStatsProvider() if (stat_data.size() > 0) { // Release profiling lock - vkReleaseProfilingLockKHR(device.get_handle()); + vkReleaseProfilingLockKHR(render_context.get_device().get_handle()); } } bool VulkanStatsProvider::fill_vendor_data() { - const auto &pd_props = device.get_gpu().get_properties(); + const auto &pd_props = render_context.get_device().get_gpu().get_properties(); if (pd_props.vendorID == 0x14E4) // Broadcom devices { LOGI("Using Vulkan performance counters from Broadcom device"); @@ -210,9 +212,11 @@ bool VulkanStatsProvider::fill_vendor_data() } } -bool VulkanStatsProvider::create_query_pools(uint32_t num_framebuffers, uint32_t queue_family_index) +bool VulkanStatsProvider::create_query_pools(uint32_t queue_family_index) { - const PhysicalDevice &gpu = device.get_gpu(); + Device & device = render_context.get_device(); + const PhysicalDevice &gpu = device.get_gpu(); + size_t num_framebuffers = render_context.get_render_frames().size(); // Now we know the available counters, we can build a query pool that will collect them. // We will check that the counters can be collected in a single pass. Multi-pass would @@ -273,6 +277,8 @@ bool VulkanStatsProvider::is_supported(const CounterSamplingConfig &sampling_con if (sampling_config.mode == CounterSamplingMode::Continuous) return false; + Device &device = render_context.get_device(); + // The VK_KHR_performance_query must be available and enabled if (!(device.is_enabled("VK_KHR_performance_query") && device.is_enabled("VK_EXT_host_query_reset"))) return false; @@ -310,8 +316,9 @@ const StatGraphData &VulkanStatsProvider::get_graph_data(StatIndex index) const return default_graph_map[index]; } -void VulkanStatsProvider::command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) +void VulkanStatsProvider::command_buffer_begun(CommandBuffer &cb) { + uint32_t active_frame_idx = render_context.get_active_frame_index(); if (timestamp_pool) { // We use TimestampQueries when available to provide a more accurate delta_time. @@ -327,8 +334,10 @@ void VulkanStatsProvider::command_buffer_begun(CommandBuffer &cb, uint32_t activ cb.begin_query(*query_pool, active_frame_idx, VkQueryControlFlags(0)); } -void VulkanStatsProvider::command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) +void VulkanStatsProvider::command_buffer_ending(CommandBuffer &cb) { + uint32_t active_frame_idx = render_context.get_active_frame_index(); + if (query_pool) { // Perform a barrier to ensure all previous commands complete before ending the query @@ -374,7 +383,7 @@ static double get_counter_value(const VkPerformanceCounterResultKHR &result, } } -float VulkanStatsProvider::get_best_delta_time(float sw_delta_time, uint32_t active_frame_idx) const +float VulkanStatsProvider::get_best_delta_time(float sw_delta_time) const { if (!timestamp_pool) return sw_delta_time; @@ -384,6 +393,8 @@ float VulkanStatsProvider::get_best_delta_time(float sw_delta_time, uint32_t act // Query the timestamps to get an accurate delta time std::array timestamps; + uint32_t active_frame_idx = render_context.get_active_frame_index(); + VkResult r = timestamp_pool->get_results(active_frame_idx * 2, 2, timestamps.size() * sizeof(uint64_t), timestamps.data(), sizeof(uint64_t), @@ -397,12 +408,14 @@ float VulkanStatsProvider::get_best_delta_time(float sw_delta_time, uint32_t act return delta_time; } -StatsProvider::Counters VulkanStatsProvider::sample(float delta_time, uint32_t active_frame_idx) +StatsProvider::Counters VulkanStatsProvider::sample(float delta_time) { Counters out; if (!query_pool || queries_ready == 0) return out; + uint32_t active_frame_idx = render_context.get_active_frame_index(); + VkDeviceSize stride = sizeof(VkPerformanceCounterResultKHR) * counter_indices.size(); std::vector results(counter_indices.size()); @@ -414,7 +427,7 @@ StatsProvider::Counters VulkanStatsProvider::sample(float delta_time, uint32_t a return out; // Use timestamps to get a more accurate delta if available - delta_time = get_best_delta_time(delta_time, active_frame_idx); + delta_time = get_best_delta_time(delta_time); // Parse the results - they are in the order we gave in counter_indices for (const auto &s : stat_data) diff --git a/framework/stats/vulkan_stats_provider.h b/framework/stats/vulkan_stats_provider.h index 42dd6ab922..f9482c93f4 100644 --- a/framework/stats/vulkan_stats_provider.h +++ b/framework/stats/vulkan_stats_provider.h @@ -22,6 +22,8 @@ namespace vkb { +class RenderContext; + class VulkanStatsProvider : public StatsProvider { private: @@ -77,13 +79,12 @@ class VulkanStatsProvider : public StatsProvider public: /** * @brief Constructs a VulkanStatsProvider - * @param device The device on which to collect stats * @param requested_stats Set of stats to be collected. Supported stats will be removed from the set. * @param sampling_config Sampling mode configuration (polling or continuous) - * @param num_framebuffers The number of buffers in the swapchain + * @param render_context The render context */ - VulkanStatsProvider(Device &device, std::set &requested_stats, - const CounterSamplingConfig &sampling_config, size_t num_framebuffers); + VulkanStatsProvider(std::set &requested_stats, const CounterSamplingConfig &sampling_config, + RenderContext &render_context); /** * @brief Destructs a VulkanStatsProvider @@ -106,35 +107,33 @@ class VulkanStatsProvider : public StatsProvider /** * @brief Retrieve a new sample set from polled sampling * @param delta_time Time since last sample - * @param active_frame_idx Which of the framebuffers is active */ - Counters sample(float delta_time, uint32_t active_frame_idx) override; + Counters sample(float delta_time) override; /** * @brief A command buffer that we want stats about has just begun * @param cb The command buffer - * @param active_frame_idx Which of the framebuffers is active */ - void command_buffer_begun(CommandBuffer &cb, uint32_t active_frame_idx) override; + void command_buffer_begun(CommandBuffer &cb) override; /** * @brief A command buffer that we want stats about is about to be ended * @param cb The command buffer - * @param active_frame_idx Which of the framebuffers is active */ - void command_buffer_ending(CommandBuffer &cb, uint32_t active_frame_idx) override; + void command_buffer_ending(CommandBuffer &cb) override; private: bool is_supported(const CounterSamplingConfig &sampling_config) const; bool fill_vendor_data(); - bool create_query_pools(uint32_t num_framebuffers, uint32_t queue_family_index); + bool create_query_pools(uint32_t queue_family_index); - float get_best_delta_time(float sw_delta_time, uint32_t active_frame_idx) const; + float get_best_delta_time(float sw_delta_time) const; private: - Device &device; + // The render context + RenderContext &render_context; // The query pool for the performance queries std::unique_ptr query_pool; diff --git a/framework/vulkan_sample.cpp b/framework/vulkan_sample.cpp index 461f739966..b4fc67d2a1 100644 --- a/framework/vulkan_sample.cpp +++ b/framework/vulkan_sample.cpp @@ -123,6 +123,8 @@ bool VulkanSample::prepare(Platform &platform) prepare_render_context(); + stats = std::make_unique(*render_context); + return true; } @@ -148,11 +150,11 @@ void VulkanSample::update_scene(float delta_time) } } -void VulkanSample::update_stats(float delta_time, uint32_t active_frame_idx) +void VulkanSample::update_stats(float delta_time) { if (stats) { - stats->update(delta_time, active_frame_idx); + stats->update(delta_time); static float stats_view_count = 0.0f; stats_view_count += delta_time; @@ -195,15 +197,14 @@ void VulkanSample::update(float delta_time) auto &command_buffer = render_context->begin(); // Collect the performance data for the sample graphs - uint32_t active_frame_idx = render_context->get_active_frame_index(); - update_stats(delta_time, active_frame_idx); + update_stats(delta_time); command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - stats->command_buffer_begun(command_buffer, active_frame_idx); + stats->command_buffer_begun(command_buffer); draw(command_buffer, render_context->get_active_frame().get_render_target()); - stats->command_buffer_ending(command_buffer, active_frame_idx); + stats->command_buffer_ending(command_buffer); command_buffer.end(); render_context->submit(command_buffer); diff --git a/framework/vulkan_sample.h b/framework/vulkan_sample.h index 41c2f6b32b..93d620713c 100644 --- a/framework/vulkan_sample.h +++ b/framework/vulkan_sample.h @@ -183,9 +183,8 @@ class VulkanSample : public Application /** * @brief Update counter values * @param delta_time - * @param active_frame_idx Which of the framebuffers is active */ - void update_stats(float delta_time, uint32_t active_frame_idx); + void update_stats(float delta_time); /** * @brief Update GUI diff --git a/samples/performance/afbc/afbc.cpp b/samples/performance/afbc/afbc.cpp index 93c590cb2e..2c23ca9edb 100644 --- a/samples/performance/afbc/afbc.cpp +++ b/samples/performance/afbc/afbc.cpp @@ -62,11 +62,9 @@ bool AFBCSample::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - size_t num_framebuffers = get_render_context().get_render_frames().size(); + stats->request_stats({vkb::StatIndex::gpu_ext_write_bytes}); - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::gpu_ext_write_bytes}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/command_buffer_usage/command_buffer_usage.cpp b/samples/performance/command_buffer_usage/command_buffer_usage.cpp index d5d6dc9a38..05c8acb0e4 100644 --- a/samples/performance/command_buffer_usage/command_buffer_usage.cpp +++ b/samples/performance/command_buffer_usage/command_buffer_usage.cpp @@ -71,11 +71,9 @@ bool CommandBufferUsage::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - size_t num_framebuffers = get_render_context().get_render_frames().size(); + stats->request_stats({vkb::StatIndex::frame_times, vkb::StatIndex::cpu_cycles}); - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::frame_times, vkb::StatIndex::cpu_cycles}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); // Adjust the maximum number of secondary command buffers // In this sample, only the recording of opaque meshes will be multi-threaded @@ -126,16 +124,14 @@ void CommandBufferUsage::update(float delta_time) auto &primary_command_buffer = render_context.begin(subpass_state.command_buffer_reset_mode); - uint32_t active_frame_idx = render_context.get_active_frame_index(); - - update_stats(delta_time, active_frame_idx); + update_stats(delta_time); primary_command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - stats->command_buffer_begun(primary_command_buffer, active_frame_idx); + stats->command_buffer_begun(primary_command_buffer); draw(primary_command_buffer, render_context.get_active_frame().get_render_target()); - stats->command_buffer_ending(primary_command_buffer, active_frame_idx); + stats->command_buffer_ending(primary_command_buffer); primary_command_buffer.end(); render_context.submit(primary_command_buffer); diff --git a/samples/performance/descriptor_management/descriptor_management.cpp b/samples/performance/descriptor_management/descriptor_management.cpp index 9345f029a1..109fdbecb7 100644 --- a/samples/performance/descriptor_management/descriptor_management.cpp +++ b/samples/performance/descriptor_management/descriptor_management.cpp @@ -57,12 +57,9 @@ bool DescriptorManagement::prepare(vkb::Platform &platform) render_pipeline.add_subpass(std::move(scene_subpass)); set_render_pipeline(std::move(render_pipeline)); - size_t num_framebuffers = get_render_context().get_render_frames().size(); - // Add a GUI with the stats you want to monitor - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + stats->request_stats({vkb::StatIndex::frame_times}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } @@ -77,9 +74,7 @@ void DescriptorManagement::update(float delta_time) auto &command_buffer = render_context.begin(); - uint32_t active_frame_idx = render_context.get_active_frame_index(); - - update_stats(delta_time, active_frame_idx); + update_stats(delta_time); // Process GUI input auto buffer_alloc_strategy = (buffer_allocation.value == 0) ? @@ -95,11 +90,11 @@ void DescriptorManagement::update(float delta_time) } command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - stats->command_buffer_begun(command_buffer, active_frame_idx); + stats->command_buffer_begun(command_buffer); draw(command_buffer, render_context.get_active_frame().get_render_target()); - stats->command_buffer_ending(command_buffer, active_frame_idx); + stats->command_buffer_ending(command_buffer); command_buffer.end(); render_context.submit(command_buffer); diff --git a/samples/performance/layout_transitions/layout_transitions.cpp b/samples/performance/layout_transitions/layout_transitions.cpp index daf956b274..7fb1a974a2 100644 --- a/samples/performance/layout_transitions/layout_transitions.cpp +++ b/samples/performance/layout_transitions/layout_transitions.cpp @@ -66,12 +66,10 @@ bool LayoutTransitions::prepare(vkb::Platform &platform) lighting_pipeline.add_subpass(std::move(lighting_subpass)); lighting_pipeline.set_load_store(vkb::gbuffer::get_load_all_store_swapchain()); - size_t num_framebuffers = get_render_context().get_render_frames().size(); + stats->request_stats({vkb::StatIndex::gpu_killed_tiles, + vkb::StatIndex::gpu_ext_write_bytes}); - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::gpu_killed_tiles, - vkb::StatIndex::gpu_ext_write_bytes}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/msaa/msaa.cpp b/samples/performance/msaa/msaa.cpp index fb311d8ce2..7cd73258e5 100644 --- a/samples/performance/msaa/msaa.cpp +++ b/samples/performance/msaa/msaa.cpp @@ -125,12 +125,9 @@ bool MSAASample::prepare(vkb::Platform &platform) update_pipelines(); - size_t num_framebuffers = get_render_context().get_render_frames().size(); - - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::frame_times, - vkb::StatIndex::gpu_ext_read_bytes, - vkb::StatIndex::gpu_ext_write_bytes}); + stats->request_stats({vkb::StatIndex::frame_times, + vkb::StatIndex::gpu_ext_read_bytes, + vkb::StatIndex::gpu_ext_write_bytes}); gui = std::make_unique(*this, platform.get_window(), stats.get()); diff --git a/samples/performance/pipeline_barriers/pipeline_barriers.cpp b/samples/performance/pipeline_barriers/pipeline_barriers.cpp index 12905c01d7..94f2f97cfc 100644 --- a/samples/performance/pipeline_barriers/pipeline_barriers.cpp +++ b/samples/performance/pipeline_barriers/pipeline_barriers.cpp @@ -100,12 +100,12 @@ bool PipelineBarriers::prepare(vkb::Platform &platform) lighting_pipeline.add_subpass(std::move(lighting_subpass)); lighting_pipeline.set_load_store(vkb::gbuffer::get_load_all_store_swapchain()); - size_t num_framebuffers = get_render_context().get_render_frames().size(); + stats->request_stats({vkb::StatIndex::frame_times, + vkb::StatIndex::gpu_vertex_cycles, + vkb::StatIndex::gpu_fragment_cycles}, + vkb::CounterSamplingConfig{vkb::CounterSamplingMode::Continuous}); - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::frame_times, vkb::StatIndex::gpu_vertex_cycles, vkb::StatIndex::gpu_fragment_cycles}, - vkb::CounterSamplingConfig{vkb::CounterSamplingMode::Continuous}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/pipeline_cache/pipeline_cache.cpp b/samples/performance/pipeline_cache/pipeline_cache.cpp index a1591f19d1..caebe2d84c 100644 --- a/samples/performance/pipeline_cache/pipeline_cache.cpp +++ b/samples/performance/pipeline_cache/pipeline_cache.cpp @@ -104,10 +104,7 @@ bool PipelineCache::prepare(vkb::Platform &platform) // Build all pipelines from a previous run resource_cache.warmup(data_cache); - size_t num_framebuffers = get_render_context().get_render_frames().size(); - - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::frame_times}); + stats->request_stats({vkb::StatIndex::frame_times}); float dpi_factor = platform.get_window().get_dpi_factor(); diff --git a/samples/performance/render_passes/render_passes.cpp b/samples/performance/render_passes/render_passes.cpp index 2644d70737..1ae9792b9a 100644 --- a/samples/performance/render_passes/render_passes.cpp +++ b/samples/performance/render_passes/render_passes.cpp @@ -109,13 +109,9 @@ bool RenderPassesSample::prepare(vkb::Platform &platform) return false; } - auto enabled_stats = {vkb::StatIndex::gpu_fragment_cycles, + stats->request_stats({vkb::StatIndex::gpu_fragment_cycles, vkb::StatIndex::gpu_ext_read_bytes, - vkb::StatIndex::gpu_ext_write_bytes}; - - size_t num_framebuffers = get_render_context().get_render_frames().size(); - - stats = std::make_unique(get_device(), num_framebuffers, enabled_stats); + vkb::StatIndex::gpu_ext_write_bytes}); load_scene("scenes/sponza/Sponza01.gltf"); diff --git a/samples/performance/render_subpasses/render_subpasses.cpp b/samples/performance/render_subpasses/render_subpasses.cpp index e8836501c6..9948eb600d 100644 --- a/samples/performance/render_subpasses/render_subpasses.cpp +++ b/samples/performance/render_subpasses/render_subpasses.cpp @@ -155,14 +155,10 @@ bool RenderSubpasses::prepare(vkb::Platform &platform) lighting_render_pipeline = create_lighting_renderpass(); // Enable stats - auto enabled_stats = {vkb::StatIndex::gpu_fragment_jobs, + stats->request_stats({vkb::StatIndex::gpu_fragment_jobs, vkb::StatIndex::gpu_tiles, vkb::StatIndex::gpu_ext_read_bytes, - vkb::StatIndex::gpu_ext_write_bytes}; - - size_t num_framebuffers = get_render_context().get_render_frames().size(); - - stats = std::make_unique(get_device(), num_framebuffers, enabled_stats); + vkb::StatIndex::gpu_ext_write_bytes}); // Enable gui gui = std::make_unique(*this, platform.get_window(), stats.get()); diff --git a/samples/performance/specialization_constants/specialization_constants.cpp b/samples/performance/specialization_constants/specialization_constants.cpp index 32bfc6db86..28166763b5 100644 --- a/samples/performance/specialization_constants/specialization_constants.cpp +++ b/samples/performance/specialization_constants/specialization_constants.cpp @@ -55,10 +55,7 @@ bool SpecializationConstants::prepare(vkb::Platform &platform) specialization_constants_pipeline = create_specialization_renderpass(); standard_pipeline = create_standard_renderpass(); - size_t num_framebuffers = get_render_context().get_render_frames().size(); - - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::gpu_fragment_cycles}); + stats->request_stats({vkb::StatIndex::gpu_fragment_cycles}); gui = std::make_unique(*this, platform.get_window(), stats.get()); diff --git a/samples/performance/surface_rotation/surface_rotation.cpp b/samples/performance/surface_rotation/surface_rotation.cpp index fad73f6b79..ce66cc0fae 100644 --- a/samples/performance/surface_rotation/surface_rotation.cpp +++ b/samples/performance/surface_rotation/surface_rotation.cpp @@ -56,11 +56,8 @@ bool SurfaceRotation::prepare(vkb::Platform &platform) throw std::runtime_error("Requires a surface to run sample"); } - auto enabled_stats = {vkb::StatIndex::gpu_ext_read_stalls, vkb::StatIndex::gpu_ext_write_stalls}; - - size_t num_framebuffers = get_render_context().get_render_frames().size(); - - stats = std::make_unique(get_device(), num_framebuffers, enabled_stats); + stats->request_stats({vkb::StatIndex::gpu_ext_read_stalls, + vkb::StatIndex::gpu_ext_write_stalls}); load_scene("scenes/sponza/Sponza01.gltf"); diff --git a/samples/performance/swapchain_images/swapchain_images.cpp b/samples/performance/swapchain_images/swapchain_images.cpp index 52338cb13c..9b9a78cf60 100644 --- a/samples/performance/swapchain_images/swapchain_images.cpp +++ b/samples/performance/swapchain_images/swapchain_images.cpp @@ -62,11 +62,8 @@ bool SwapchainImages::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); - size_t num_framebuffers = get_render_context().get_render_frames().size(); - - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + stats->request_stats({vkb::StatIndex::frame_times}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } diff --git a/samples/performance/wait_idle/wait_idle.cpp b/samples/performance/wait_idle/wait_idle.cpp index 50bbb68c34..9c38f4a6d1 100644 --- a/samples/performance/wait_idle/wait_idle.cpp +++ b/samples/performance/wait_idle/wait_idle.cpp @@ -63,12 +63,9 @@ bool WaitIdle::prepare(vkb::Platform &platform) render_pipeline.add_subpass(std::move(scene_subpass)); set_render_pipeline(std::move(render_pipeline)); - size_t num_framebuffers = get_render_context().get_render_frames().size(); - // Add a GUI with the stats you want to monitor - stats = std::make_unique(get_device(), num_framebuffers, - std::set{vkb::StatIndex::frame_times}); - gui = std::make_unique(*this, platform.get_window(), stats.get()); + stats->request_stats({vkb::StatIndex::frame_times}); + gui = std::make_unique(*this, platform.get_window(), stats.get()); return true; } From 16ba1b6a33ec2444654aa5f80cf5f0f8eeaffccf Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Tue, 19 May 2020 15:56:01 +0100 Subject: [PATCH 09/11] Fix Windows build size_t -> uint32_t --- framework/stats/vulkan_stats_provider.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp index 1c04a2a3d1..e34d54f374 100644 --- a/framework/stats/vulkan_stats_provider.cpp +++ b/framework/stats/vulkan_stats_provider.cpp @@ -216,7 +216,7 @@ bool VulkanStatsProvider::create_query_pools(uint32_t queue_family_index) { Device & device = render_context.get_device(); const PhysicalDevice &gpu = device.get_gpu(); - size_t num_framebuffers = render_context.get_render_frames().size(); + uint32_t num_framebuffers = uint32_t(render_context.get_render_frames().size()); // Now we know the available counters, we can build a query pool that will collect them. // We will check that the counters can be collected in a single pass. Multi-pass would From 8260b968e62d5681bf2d6777c0a82bc0ed20242b Mon Sep 17 00:00:00 2001 From: Gary Sweet Date: Tue, 26 May 2020 10:11:38 +0100 Subject: [PATCH 10/11] Tidy up some formatting and rename some methods --- framework/stats/hwcpipe_stats_provider.cpp | 8 +++++--- framework/stats/hwcpipe_stats_provider.h | 3 +-- framework/stats/stats.cpp | 11 +++++------ framework/stats/stats.h | 10 ++++++---- framework/stats/stats_provider.h | 4 ++-- framework/stats/vulkan_stats_provider.cpp | 4 ++-- framework/stats/vulkan_stats_provider.h | 4 ++-- framework/vulkan_sample.cpp | 4 ++-- .../command_buffer_usage/command_buffer_usage.cpp | 4 ++-- .../descriptor_management/descriptor_management.cpp | 4 ++-- 10 files changed, 29 insertions(+), 27 deletions(-) diff --git a/framework/stats/hwcpipe_stats_provider.cpp b/framework/stats/hwcpipe_stats_provider.cpp index e8ce3c49cd..384a62567f 100644 --- a/framework/stats/hwcpipe_stats_provider.cpp +++ b/framework/stats/hwcpipe_stats_provider.cpp @@ -22,9 +22,7 @@ namespace vkb { -HWCPipeStatsProvider::HWCPipeStatsProvider(std::set & requested_stats, - CounterSamplingConfig sampling_config) : - sampling_config(sampling_config) +HWCPipeStatsProvider::HWCPipeStatsProvider(std::set &requested_stats) { // Mapping of stats to their hwcpipe availability // clang-format off @@ -93,7 +91,9 @@ HWCPipeStatsProvider::HWCPipeStatsProvider(std::set & requested_stats ++iter; } else + { iter = stat_data.erase(iter); + } break; } case StatType::Gpu: @@ -107,7 +107,9 @@ HWCPipeStatsProvider::HWCPipeStatsProvider(std::set & requested_stats ++iter; } else + { iter = stat_data.erase(iter); + } break; } } diff --git a/framework/stats/hwcpipe_stats_provider.h b/framework/stats/hwcpipe_stats_provider.h index 38ebf4b0f4..81df4303de 100644 --- a/framework/stats/hwcpipe_stats_provider.h +++ b/framework/stats/hwcpipe_stats_provider.h @@ -86,9 +86,8 @@ class HWCPipeStatsProvider : public StatsProvider /** * @brief Constructs a HWCPipeStateProvider * @param requested_stats Set of stats to be collected. Supported stats will be removed from the set. - * @param sampling_config Sampling mode configuration (polling or continuous) */ - HWCPipeStatsProvider(std::set &requested_stats, CounterSamplingConfig sampling_config); + HWCPipeStatsProvider(std::set &requested_stats); /** * @brief Checks if this provider can supply the given enabled stat diff --git a/framework/stats/stats.cpp b/framework/stats/stats.cpp index e20475adb7..f704894dff 100644 --- a/framework/stats/stats.cpp +++ b/framework/stats/stats.cpp @@ -51,7 +51,6 @@ void Stats::request_stats(const std::set &wanted_stats, { if (providers.size() != 0) { - LOGE("Stats must only be requested once"); throw std::runtime_error("Stats must only be requested once"); } @@ -65,7 +64,7 @@ void Stats::request_stats(const std::set &wanted_stats, // All supported stats will be removed from the given 'stats' set by the provider's constructor // so subsequent providers only see requests for stats that aren't already supported. providers.emplace_back(std::make_unique(stats)); - providers.emplace_back(std::make_unique(stats, sampling_config)); + providers.emplace_back(std::make_unique(stats)); providers.emplace_back(std::make_unique(stats, sampling_config, render_context)); // In continuous sampling mode we still need to update the frame times as if we are polling @@ -247,18 +246,18 @@ void Stats::push_sample(const StatsProvider::Counters &sample) } } -void Stats::command_buffer_begun(CommandBuffer &cb) +void Stats::begin_sampling(CommandBuffer &cb) { // Inform the providers for (auto &p : providers) - p->command_buffer_begun(cb); + p->begin_sampling(cb); } -void Stats::command_buffer_ending(CommandBuffer &cb) +void Stats::end_sampling(CommandBuffer &cb) { // Inform the providers for (auto &p : providers) - p->command_buffer_ending(cb); + p->end_sampling(cb); } const StatGraphData &Stats::get_graph_data(StatIndex index) const diff --git a/framework/stats/stats.h b/framework/stats/stats.h index d59c1196d7..069b8401c6 100644 --- a/framework/stats/stats.h +++ b/framework/stats/stats.h @@ -115,10 +115,11 @@ class Stats * know when a command buffer has begun and when it's about to end so that we * can inject some extra commands into the command buffer to control the stats * collection. This method tells the stats provider that a command buffer has - * begun so that can happen. + * begun so that can happen. The command buffer must be in a recording state + * when this method is called. * @param cb The command buffer */ - void command_buffer_begun(CommandBuffer &cb); + void begin_sampling(CommandBuffer &cb); /** * @brief A command buffer that we want to collect stats about is about to be ended @@ -128,10 +129,11 @@ class Stats * know when a command buffer has begun and when it's about to end so that we * can inject some extra commands into the command buffer to control the stats * collection. This method tells the stats provider that a command buffer is - * about to be ended so that can happen. + * about to be ended so that can happen. The command buffer must be in a recording + * state when this method is called. * @param cb The command buffer */ - void command_buffer_ending(CommandBuffer &cb); + void end_sampling(CommandBuffer &cb); private: /// The render context diff --git a/framework/stats/stats_provider.h b/framework/stats/stats_provider.h index d01b6867c9..a5aa92013a 100644 --- a/framework/stats/stats_provider.h +++ b/framework/stats/stats_provider.h @@ -87,14 +87,14 @@ class StatsProvider * @brief A command buffer that we want stats about has just begun * @param cb The command buffer */ - virtual void command_buffer_begun(CommandBuffer &cb) + virtual void begin_sampling(CommandBuffer &cb) {} /** * @brief A command buffer that we want stats about is about to be ended * @param cb The command buffer */ - virtual void command_buffer_ending(CommandBuffer &cb) + virtual void end_sampling(CommandBuffer &cb) {} protected: diff --git a/framework/stats/vulkan_stats_provider.cpp b/framework/stats/vulkan_stats_provider.cpp index e34d54f374..046d4ce5b2 100644 --- a/framework/stats/vulkan_stats_provider.cpp +++ b/framework/stats/vulkan_stats_provider.cpp @@ -316,7 +316,7 @@ const StatGraphData &VulkanStatsProvider::get_graph_data(StatIndex index) const return default_graph_map[index]; } -void VulkanStatsProvider::command_buffer_begun(CommandBuffer &cb) +void VulkanStatsProvider::begin_sampling(CommandBuffer &cb) { uint32_t active_frame_idx = render_context.get_active_frame_index(); if (timestamp_pool) @@ -334,7 +334,7 @@ void VulkanStatsProvider::command_buffer_begun(CommandBuffer &cb) cb.begin_query(*query_pool, active_frame_idx, VkQueryControlFlags(0)); } -void VulkanStatsProvider::command_buffer_ending(CommandBuffer &cb) +void VulkanStatsProvider::end_sampling(CommandBuffer &cb) { uint32_t active_frame_idx = render_context.get_active_frame_index(); diff --git a/framework/stats/vulkan_stats_provider.h b/framework/stats/vulkan_stats_provider.h index f9482c93f4..fdd51722e4 100644 --- a/framework/stats/vulkan_stats_provider.h +++ b/framework/stats/vulkan_stats_provider.h @@ -114,13 +114,13 @@ class VulkanStatsProvider : public StatsProvider * @brief A command buffer that we want stats about has just begun * @param cb The command buffer */ - void command_buffer_begun(CommandBuffer &cb) override; + void begin_sampling(CommandBuffer &cb) override; /** * @brief A command buffer that we want stats about is about to be ended * @param cb The command buffer */ - void command_buffer_ending(CommandBuffer &cb) override; + void end_sampling(CommandBuffer &cb) override; private: bool is_supported(const CounterSamplingConfig &sampling_config) const; diff --git a/framework/vulkan_sample.cpp b/framework/vulkan_sample.cpp index b4fc67d2a1..ef353e35ef 100644 --- a/framework/vulkan_sample.cpp +++ b/framework/vulkan_sample.cpp @@ -200,11 +200,11 @@ void VulkanSample::update(float delta_time) update_stats(delta_time); command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - stats->command_buffer_begun(command_buffer); + stats->begin_sampling(command_buffer); draw(command_buffer, render_context->get_active_frame().get_render_target()); - stats->command_buffer_ending(command_buffer); + stats->end_sampling(command_buffer); command_buffer.end(); render_context->submit(command_buffer); diff --git a/samples/performance/command_buffer_usage/command_buffer_usage.cpp b/samples/performance/command_buffer_usage/command_buffer_usage.cpp index 05c8acb0e4..037e33da61 100644 --- a/samples/performance/command_buffer_usage/command_buffer_usage.cpp +++ b/samples/performance/command_buffer_usage/command_buffer_usage.cpp @@ -127,11 +127,11 @@ void CommandBufferUsage::update(float delta_time) update_stats(delta_time); primary_command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - stats->command_buffer_begun(primary_command_buffer); + stats->begin_sampling(primary_command_buffer); draw(primary_command_buffer, render_context.get_active_frame().get_render_target()); - stats->command_buffer_ending(primary_command_buffer); + stats->end_sampling(primary_command_buffer); primary_command_buffer.end(); render_context.submit(primary_command_buffer); diff --git a/samples/performance/descriptor_management/descriptor_management.cpp b/samples/performance/descriptor_management/descriptor_management.cpp index 109fdbecb7..99f55cd907 100644 --- a/samples/performance/descriptor_management/descriptor_management.cpp +++ b/samples/performance/descriptor_management/descriptor_management.cpp @@ -90,11 +90,11 @@ void DescriptorManagement::update(float delta_time) } command_buffer.begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - stats->command_buffer_begun(command_buffer); + stats->begin_sampling(command_buffer); draw(command_buffer, render_context.get_active_frame().get_render_target()); - stats->command_buffer_ending(command_buffer); + stats->end_sampling(command_buffer); command_buffer.end(); render_context.submit(command_buffer); From 9d15a8c6fc8e43d38aa7782757f1603b23d7b65e Mon Sep 17 00:00:00 2001 From: AlejandroCosin Date: Thu, 11 Jun 2020 14:47:22 +0100 Subject: [PATCH 11/11] Fix Wait Idle sample: render_context is reset in WaitIdle::prepare and then used some lines below in the call to stats->request_stats --- samples/performance/wait_idle/wait_idle.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samples/performance/wait_idle/wait_idle.cpp b/samples/performance/wait_idle/wait_idle.cpp index 9c38f4a6d1..53fe027662 100644 --- a/samples/performance/wait_idle/wait_idle.cpp +++ b/samples/performance/wait_idle/wait_idle.cpp @@ -64,6 +64,8 @@ bool WaitIdle::prepare(vkb::Platform &platform) set_render_pipeline(std::move(render_pipeline)); // Add a GUI with the stats you want to monitor + stats.reset(); + stats = std::make_unique(*render_context); stats->request_stats({vkb::StatIndex::frame_times}); gui = std::make_unique(*this, platform.get_window(), stats.get());