diff --git a/src/xrt/compositor/vk_native/comp_vk_native_compositor.c b/src/xrt/compositor/vk_native/comp_vk_native_compositor.c index 1b58a99f1..f058bd204 100644 --- a/src/xrt/compositor/vk_native/comp_vk_native_compositor.c +++ b/src/xrt/compositor/vk_native/comp_vk_native_compositor.c @@ -606,6 +606,41 @@ vk_comp(struct xrt_compositor *xc) return (struct comp_vk_native_compositor *)xc; } +/* + * If the renderer signaled a frame-done semaphore on its most recent draw() + * submit and no consumer has taken it yet this frame, wire it into the given + * VkSubmitInfo so this submit waits on it instead of the caller issuing a + * vkQueueWaitIdle between submits. The semaphore handle and wait-stage mask + * are written to the caller-owned storage (must outlive vkQueueSubmit). + * + * When the renderer didn't submit this frame (zero-copy path) or a prior + * caller already consumed it, leaves waitSemaphoreCount at 0 — safe no-op. + */ +static inline void +vk_native_wire_renderer_wait(struct comp_vk_native_compositor *c, + VkSubmitInfo *si, + VkSemaphore *out_sem_storage, + VkPipelineStageFlags *out_stage_storage) +{ + uint64_t sem_u64 = comp_vk_native_renderer_take_frame_done_semaphore(c->renderer); + if (sem_u64 == 0) { + return; + } + *out_sem_storage = (VkSemaphore)(uintptr_t)sem_u64; + // Renderer's atlas is read by every downstream pre-DP / DP / fallback + // path either as a sampled texture (FRAGMENT_SHADER), a copy/blit + // source (TRANSFER), or sampled into a render pass (COLOR_ATTACHMENT + // indirectly). Waiting at the union of those stages is correct and + // only costs scheduling — not a CPU stall. + *out_stage_storage = + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + si->waitSemaphoreCount = 1; + si->pWaitSemaphores = out_sem_storage; + si->pWaitDstStageMask = out_stage_storage; +} + /* * * xrt_compositor member functions @@ -2185,6 +2220,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t .commandBufferCount = 1, .pCommandBuffers = &cmd, }; + VkSemaphore pre_wait_sem = VK_NULL_HANDLE; + VkPipelineStageFlags pre_wait_stage = 0; + vk_native_wire_renderer_wait(c, &pre_si, &pre_wait_sem, &pre_wait_stage); res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &pre_si, VK_NULL_HANDLE); if (res == VK_SUCCESS) { vk->vkQueueWaitIdle(vk->main_queue->queue); @@ -2220,6 +2258,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t .commandBufferCount = 1, .pCommandBuffers = &cmd, }; + VkSemaphore post_wait_sem = VK_NULL_HANDLE; + VkPipelineStageFlags post_wait_stage = 0; + vk_native_wire_renderer_wait(c, &submit_info, &post_wait_sem, &post_wait_stage); res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE); if (res == VK_SUCCESS) { vk->vkQueueWaitIdle(vk->main_queue->queue); @@ -2244,6 +2285,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t .commandBufferCount = 1, .pCommandBuffers = &cmd, }; + VkSemaphore fb_wait_sem = VK_NULL_HANDLE; + VkPipelineStageFlags fb_wait_stage = 0; + vk_native_wire_renderer_wait(c, &submit_info, &fb_wait_sem, &fb_wait_stage); res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE); if (res == VK_SUCCESS) { vk->vkQueueWaitIdle(vk->main_queue->queue); @@ -2388,6 +2432,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t .commandBufferCount = 1, .pCommandBuffers = &cmd, }; + VkSemaphore tgt_pre_wait_sem = VK_NULL_HANDLE; + VkPipelineStageFlags tgt_pre_wait_stage = 0; + vk_native_wire_renderer_wait(c, &pre_si, &tgt_pre_wait_sem, &tgt_pre_wait_stage); res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &pre_si, VK_NULL_HANDLE); if (res == VK_SUCCESS) { vk->vkQueueWaitIdle(vk->main_queue->queue); @@ -2467,6 +2514,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t .commandBufferCount = 1, .pCommandBuffers = &cmd, }; + VkSemaphore tgt_wait_sem = VK_NULL_HANDLE; + VkPipelineStageFlags tgt_wait_stage = 0; + vk_native_wire_renderer_wait(c, &submit_info, &tgt_wait_sem, &tgt_wait_stage); res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE); if (res == VK_SUCCESS) { diff --git a/src/xrt/compositor/vk_native/comp_vk_native_renderer.c b/src/xrt/compositor/vk_native/comp_vk_native_renderer.c index 82cbc245f..03e792e83 100644 --- a/src/xrt/compositor/vk_native/comp_vk_native_renderer.c +++ b/src/xrt/compositor/vk_native/comp_vk_native_renderer.c @@ -75,6 +75,27 @@ struct comp_vk_native_renderer //! When true, clear the atlas to alpha=0 (transparent) instead of //! opaque black, so app alpha<1 regions survive to the present (issue #392). bool transparent_background; + + //! Binary semaphore signaled at end of draw()'s queue submit so the + //! compositor's downstream pre-DP submit can chain without a CPU + //! waitIdle between them. Single-use per draw — take_frame_done_semaphore() + //! returns this handle and clears @ref signal_pending so subsequent + //! submits in the same frame don't double-wait. + VkSemaphore frame_done_sem; + + //! Fence signaled alongside @ref frame_done_sem. Waited at the start of + //! the next draw() to enforce CPU-side back-pressure and to know when + //! @ref pending_cmd is safe to free. + VkFence frame_done_fence; + + //! Cmd buffer in flight from the previous draw(). NULL on first call. + //! Freed at the start of the next draw() once @ref frame_done_fence + //! signals, or at destroy() time after vkDeviceWaitIdle. + VkCommandBuffer pending_cmd; + + //! True after draw() signals @ref frame_done_sem and until a downstream + //! submit takes it via take_frame_done_semaphore(). + bool signal_pending; }; static void @@ -234,6 +255,35 @@ comp_vk_native_renderer_create(struct comp_vk_native_compositor *c, return xret; } + // Per-frame sync primitives for the combined-submit chain + // (renderer.draw() signals → compositor's pre-DP submit waits, no + // vkQueueWaitIdle between them). The fence starts signaled so the + // first draw() can free a (non-existent) previous cmd buffer and + // vkResetFences without an initial wait stall. + VkSemaphoreCreateInfo sem_ci = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; + res = vk->vkCreateSemaphore(vk->device, &sem_ci, NULL, &r->frame_done_sem); + if (res != VK_SUCCESS) { + U_LOG_E("Failed to create renderer frame-done semaphore: %d", res); + destroy_atlas_resources(r); + vk->vkDestroyCommandPool(vk->device, r->cmd_pool, NULL); + free(r); + return XRT_ERROR_VULKAN; + } + + VkFenceCreateInfo fence_ci = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + res = vk->vkCreateFence(vk->device, &fence_ci, NULL, &r->frame_done_fence); + if (res != VK_SUCCESS) { + U_LOG_E("Failed to create renderer frame-done fence: %d", res); + vk->vkDestroySemaphore(vk->device, r->frame_done_sem, NULL); + destroy_atlas_resources(r); + vk->vkDestroyCommandPool(vk->device, r->cmd_pool, NULL); + free(r); + return XRT_ERROR_VULKAN; + } + *out_renderer = r; return XRT_SUCCESS; } @@ -250,6 +300,18 @@ comp_vk_native_renderer_destroy(struct comp_vk_native_renderer **renderer_ptr) vk->vkDeviceWaitIdle(vk->device); + if (r->pending_cmd != VK_NULL_HANDLE) { + vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &r->pending_cmd); + r->pending_cmd = VK_NULL_HANDLE; + } + + if (r->frame_done_fence != VK_NULL_HANDLE) { + vk->vkDestroyFence(vk->device, r->frame_done_fence, NULL); + } + if (r->frame_done_sem != VK_NULL_HANDLE) { + vk->vkDestroySemaphore(vk->device, r->frame_done_sem, NULL); + } + destroy_atlas_resources(r); if (r->cmd_pool != VK_NULL_HANDLE) { @@ -306,6 +368,46 @@ comp_vk_native_renderer_draw(struct comp_vk_native_renderer *r, (void)left_eye; (void)right_eye; + // Wait for the previous frame's submit to finish so we can safely + // free its cmd buffer and (later) reuse the per-frame fence. Fence + // starts signaled at create time so the first call returns + // immediately. We DON'T reset the fence here — that's deferred + // until just before the main vkQueueSubmit so any early-return + // failure (cmd-buffer alloc fails, drain fails, etc.) leaves the + // fence in its signaled state. Otherwise the next draw() would + // vkWaitForFences forever on a fence nothing ever signals. + vk->vkWaitForFences(vk->device, 1, &r->frame_done_fence, VK_TRUE, UINT64_MAX); + + if (r->pending_cmd != VK_NULL_HANDLE) { + vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &r->pending_cmd); + r->pending_cmd = VK_NULL_HANDLE; + } + + // Defensive: if the previous frame's frame-done semaphore was + // signaled but never consumed by a downstream submit (e.g. that + // submit failed, or resize() drained the GPU without going through + // the compositor's frame loop), drain it now via a no-op + // wait-submit so we don't double-signal a binary semaphore on the + // vkQueueSubmit below — that's undefined behavior per Vulkan spec. + // Only clear signal_pending if the drain actually went through; + // otherwise the next signaling submit would risk the double-signal UB. + if (r->signal_pending) { + VkPipelineStageFlags drain_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkSubmitInfo drain = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &r->frame_done_sem, + .pWaitDstStageMask = &drain_stage, + }; + VkResult drain_res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &drain, VK_NULL_HANDLE); + if (drain_res == VK_SUCCESS) { + r->signal_pending = false; + } else { + U_LOG_E("Failed to drain stuck frame-done semaphore: %d", drain_res); + return XRT_ERROR_VULKAN; + } + } + VkCommandBufferAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = r->cmd_pool, @@ -460,21 +562,38 @@ comp_vk_native_renderer_draw(struct comp_vk_native_renderer *r, vk->vkEndCommandBuffer(cmd); + // Signal the frame-done semaphore on submit so the compositor's + // pre-DP submit (which reads from atlas_image) can chain after us + // without a CPU vkQueueWaitIdle. The fence catches the same event + // CPU-side so the next draw() can safely free this cmd buffer. + // + // Reset the fence here (not at the top of draw) so any early-return + // failure above leaves the fence in its previous signaled state + // rather than deadlocking the next draw() on a fence nothing signals. + vk->vkResetFences(vk->device, 1, &r->frame_done_fence); + VkSubmitInfo submit_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &cmd, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &r->frame_done_sem, }; - res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE); + res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, r->frame_done_fence); if (res != VK_SUCCESS) { U_LOG_E("Failed to submit renderer commands: %d", res); vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &cmd); + // Re-signal the fence with a no-op submit so the next draw() + // doesn't deadlock waiting on a fence that vkQueueSubmit + // refused to associate with a batch. + VkSubmitInfo signal_only = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO}; + vk->vkQueueSubmit(vk->main_queue->queue, 1, &signal_only, r->frame_done_fence); return XRT_ERROR_VULKAN; } - vk->vkQueueWaitIdle(vk->main_queue->queue); - vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &cmd); + r->pending_cmd = cmd; + r->signal_pending = true; return XRT_SUCCESS; } @@ -700,3 +819,13 @@ comp_vk_native_renderer_set_transparent(struct comp_vk_native_renderer *r, bool { r->transparent_background = transparent_background; } + +uint64_t +comp_vk_native_renderer_take_frame_done_semaphore(struct comp_vk_native_renderer *r) +{ + if (!r->signal_pending) { + return 0; + } + r->signal_pending = false; + return (uint64_t)(uintptr_t)r->frame_done_sem; +} diff --git a/src/xrt/compositor/vk_native/comp_vk_native_renderer.h b/src/xrt/compositor/vk_native/comp_vk_native_renderer.h index 51d6efcd6..628b07ef1 100644 --- a/src/xrt/compositor/vk_native/comp_vk_native_renderer.h +++ b/src/xrt/compositor/vk_native/comp_vk_native_renderer.h @@ -251,6 +251,27 @@ void comp_vk_native_renderer_set_transparent(struct comp_vk_native_renderer *renderer, bool transparent_background); +/*! + * Take ownership of the binary semaphore signaled by the most recent + * @ref comp_vk_native_renderer_draw submit. The compositor passes this + * as a wait semaphore on its downstream pre-DP submit, replacing the + * previous vkQueueWaitIdle between renderer and compositor submits. + * + * Returns VK_NULL_HANDLE (cast to uint64_t) when there is no pending + * signal — e.g. on the zero-copy path where draw() was skipped, or when + * a previous caller in the same frame already consumed it. + * + * Single-consumer per draw() call. Caller is responsible for waiting on + * the returned handle at an appropriate pipeline stage. + * + * @param renderer The renderer. + * @return VkSemaphore as uint64_t, or 0 (VK_NULL_HANDLE) when none pending. + * + * @ingroup comp_vk_native + */ +uint64_t +comp_vk_native_renderer_take_frame_done_semaphore(struct comp_vk_native_renderer *renderer); + #ifdef __cplusplus } #endif