diff --git a/src/config.def b/src/config.def index d2e86bb6..62ff0a2c 100644 --- a/src/config.def +++ b/src/config.def @@ -71,6 +71,8 @@ OPTION(bool, keep_memory_allocations_mapped, false) OPTION(std::string, device_extensions, "") OPTION(std::string, device_extensions_masked, "") +OPTION(bool, reuse_descriptor_set, false) + // // Logging // diff --git a/src/device.hpp b/src/device.hpp index 3ea45c8a..1e8ee5fe 100644 --- a/src/device.hpp +++ b/src/device.hpp @@ -714,6 +714,10 @@ struct cvk_device : public _cl_device_id, return m_clvk_properties->keep_memory_allocations_mapped(); } + bool reuse_descriptor_set() const { + return m_clvk_properties->reuse_descriptor_set(); + } + TRACE_TRACK_FCT(device_track, "clvk-device_" + std::to_string((uintptr_t)this)) diff --git a/src/device_properties.cpp b/src/device_properties.cpp index 388d9100..68c90141 100644 --- a/src/device_properties.cpp +++ b/src/device_properties.cpp @@ -26,8 +26,9 @@ struct cvk_device_properties_mali : public cvk_device_properties { cl_uint get_max_first_cmd_batch_size() const override final { return 10; } cl_uint get_max_cmd_group_size() const override final { return 1; } - cvk_device_properties_mali(const uint32_t deviceID) - : m_deviceID(deviceID) {} + cvk_device_properties_mali(const uint32_t deviceID, + const VkDriverId driverID) + : m_deviceID(deviceID), m_driverID(driverID) {} bool is_non_uniform_decoration_broken() const override final { #define GPU_ID2_ARCH_MAJOR_SHIFT 28 @@ -37,24 +38,31 @@ struct cvk_device_properties_mali : public cvk_device_properties { return (m_deviceID & GPU_ID2_ARCH_MAJOR) <= bifrost_arch_major; } + bool reuse_descriptor_set() const override final { + return m_driverID == VK_DRIVER_ID_ARM_PROPRIETARY; + } + private: const uint32_t m_deviceID; + const VkDriverId m_driverID; }; struct cvk_device_properties_mali_exynos9820 : public cvk_device_properties_mali { cl_ulong get_global_mem_cache_size() const override final { return 262144; } cl_ulong get_num_compute_units() const override final { return 12; } - cvk_device_properties_mali_exynos9820(const uint32_t deviceID) - : cvk_device_properties_mali(deviceID) {} + cvk_device_properties_mali_exynos9820(const uint32_t deviceID, + const VkDriverId driverID) + : cvk_device_properties_mali(deviceID, driverID) {} }; struct cvk_device_properties_mali_exynos990 : public cvk_device_properties_mali { cl_ulong get_global_mem_cache_size() const override final { return 262144; } cl_ulong get_num_compute_units() const override final { return 11; } - cvk_device_properties_mali_exynos990(const uint32_t deviceID) - : cvk_device_properties_mali(deviceID) {} + cvk_device_properties_mali_exynos990(const uint32_t deviceID, + const VkDriverId driverID) + : cvk_device_properties_mali(deviceID, driverID) {} }; static bool isMaliDevice(const char* name, const uint32_t vendorID) { @@ -264,9 +272,9 @@ std::unique_ptr create_cvk_device_properties( cvk_warn("Unable to query 'ro.hardware' system property, some " "device properties will be incorrect."); } else if (strcmp(soc, "exynos9820") == 0) { - RETURN(cvk_device_properties_mali_exynos9820, deviceID); + RETURN(cvk_device_properties_mali_exynos9820, deviceID, driverID); } else if (strcmp(soc, "exynos990") == 0) { - RETURN(cvk_device_properties_mali_exynos990, deviceID); + RETURN(cvk_device_properties_mali_exynos990, deviceID, driverID); } else { cvk_warn("Unrecognized 'ro.hardware' value '%s', some device " "properties will be incorrect.", @@ -276,7 +284,7 @@ std::unique_ptr create_cvk_device_properties( cvk_warn("Unrecognized Mali device, some device properties will be " "incorrect."); #endif - RETURN(cvk_device_properties_mali, deviceID); + RETURN(cvk_device_properties_mali, deviceID, driverID); } else if (strcmp(name, "Adreno (TM) 615") == 0) { RETURN(cvk_device_properties_adreno_615); } else if (strcmp(name, "Adreno (TM) 620") == 0) { diff --git a/src/device_properties.hpp b/src/device_properties.hpp index f99520f7..d3bc0857 100644 --- a/src/device_properties.hpp +++ b/src/device_properties.hpp @@ -75,6 +75,10 @@ struct cvk_device_properties { return config.keep_memory_allocations_mapped(); } + virtual bool reuse_descriptor_set() const { + return config.reuse_descriptor_set(); + } + virtual ~cvk_device_properties() {} }; diff --git a/src/kernel.hpp b/src/kernel.hpp index 43b9a259..c97151a3 100644 --- a/src/kernel.hpp +++ b/src/kernel.hpp @@ -194,11 +194,7 @@ struct cvk_kernel_argument_values { m_descriptor_sets_refcount(0) {} ~cvk_kernel_argument_values() { - for (auto ds : m_descriptor_sets) { - if (ds != VK_NULL_HANDLE) { - m_entry_point->free_descriptor_set(ds); - } - } + m_entry_point->free_descriptor_sets(m_descriptor_sets); } static std::shared_ptr @@ -385,12 +381,7 @@ struct cvk_kernel_argument_values { std::lock_guard lock(m_lock); if (--m_descriptor_sets_refcount == 0) { m_is_enqueued = false; - for (auto& ds : m_descriptor_sets) { - if (ds != VK_NULL_HANDLE) { - m_entry_point->free_descriptor_set(ds); - ds = VK_NULL_HANDLE; - } - } + m_entry_point->free_descriptor_sets(m_descriptor_sets); } } @@ -437,7 +428,6 @@ struct cvk_kernel_argument_values { std::vector m_args_set; std::unique_ptr m_pod_buffer; - std::array - m_descriptor_sets; + cvk_descriptor_set_array m_descriptor_sets; uint32_t m_descriptor_sets_refcount; }; diff --git a/src/program.cpp b/src/program.cpp index 29c5c579..35c156e9 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -2270,6 +2270,16 @@ bool cvk_entry_point::allocate_descriptor_sets(VkDescriptorSet* ds) { std::lock_guard lock(m_descriptor_pool_lock); + if (m_device->reuse_descriptor_set() && !m_descriptor_sets_array.empty()) { + cvk_descriptor_set_array descriptor_sets = + m_descriptor_sets_array.back(); + m_descriptor_sets_array.pop_back(); + for (unsigned i = 0; i < descriptor_sets.size(); i++) { + ds[i] = descriptor_sets[i]; + } + return true; + } + #if CLVK_UNIT_TESTING_ENABLED if (config.force_descriptor_set_allocation_failure() && m_nb_descriptor_set_allocated + m_descriptor_set_layouts.size() > diff --git a/src/program.hpp b/src/program.hpp index 7d01858c..2106631e 100644 --- a/src/program.hpp +++ b/src/program.hpp @@ -428,6 +428,9 @@ using cvk_spec_constant_map = std::map; struct cvk_program; +using cvk_descriptor_set_array = + std::array; + class cvk_entry_point { public: cvk_entry_point(cvk_device* dev, cvk_program* program, @@ -440,6 +443,11 @@ class cvk_entry_point { m_name.c_str()); vkDestroyPipeline(vkdev, pipeline.second, nullptr); } + for (auto& descriptor_sets : m_descriptor_sets_array) { + vkFreeDescriptorSets(m_device->vulkan_device(), m_descriptor_pool, + descriptor_sets.size(), + descriptor_sets.data()); + } if (m_descriptor_pool != VK_NULL_HANDLE) { vkDestroyDescriptorPool(vkdev, m_descriptor_pool, nullptr); } @@ -458,14 +466,23 @@ class cvk_entry_point { CHECK_RETURN bool allocate_descriptor_sets(VkDescriptorSet* ds); - void free_descriptor_set(VkDescriptorSet ds) { + void free_descriptor_sets(cvk_descriptor_set_array& descriptor_sets) { TRACE_FUNCTION(); + if (descriptor_sets == cvk_descriptor_set_array{VK_NULL_HANDLE}) { + return; + } std::lock_guard lock(m_descriptor_pool_lock); - vkFreeDescriptorSets(m_device->vulkan_device(), m_descriptor_pool, 1, - &ds); - m_nb_descriptor_set_allocated--; - TRACE_CNT(descriptor_set_allocated_counter, - m_nb_descriptor_set_allocated); + if (m_device->reuse_descriptor_set()) { + m_descriptor_sets_array.push_back(descriptor_sets); + } else { + vkFreeDescriptorSets(m_device->vulkan_device(), m_descriptor_pool, + descriptor_sets.size(), + descriptor_sets.data()); + m_nb_descriptor_set_allocated -= m_descriptor_set_layouts.size(); + TRACE_CNT(descriptor_set_allocated_counter, + m_nb_descriptor_set_allocated); + } + descriptor_sets = cvk_descriptor_set_array{VK_NULL_HANDLE}; } uint32_t num_set_layouts() const { return m_descriptor_set_layouts.size(); } @@ -572,6 +589,8 @@ class cvk_entry_point { TRACE_CNT_VAR(descriptor_set_allocated_counter); bool m_first_allocation_failure; + + std::vector m_descriptor_sets_array; }; struct cvk_program : public _cl_program, api_object { diff --git a/tests/perfetto/api_tests.EnqueueTooManyCommandWithRetry-expectation.txt b/tests/perfetto/api_tests.EnqueueTooManyCommandWithRetry-expectation.txt index 13f8722a..2c22ce6c 100644 --- a/tests/perfetto/api_tests.EnqueueTooManyCommandWithRetry-expectation.txt +++ b/tests/perfetto/api_tests.EnqueueTooManyCommandWithRetry-expectation.txt @@ -30,7 +30,7 @@ "execute_cmds" "extract_cmds_required_by" "flush_no_lock" -"free_descriptor_set" +"free_descriptor_sets" "name" "set_event_status" "vkQueueSubmit" diff --git a/tests/perfetto/simple_test-expectation.txt b/tests/perfetto/simple_test-expectation.txt index 3f23b5ef..d49cfb70 100644 --- a/tests/perfetto/simple_test-expectation.txt +++ b/tests/perfetto/simple_test-expectation.txt @@ -29,7 +29,7 @@ "execute_cmds" "extract_cmds_required_by" "flush_no_lock" -"free_descriptor_set" +"free_descriptor_sets" "name" "set_event_status" "vkQueueSubmit"