Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions src/xrt/compositor/vk_native/comp_vk_native_compositor.c
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,41 @@ vk_comp(struct xrt_compositor *xc)
return (struct comp_vk_native_compositor *)xc;
}

/*
* If the renderer signaled a frame-done semaphore on its most recent draw()
* submit and no consumer has taken it yet this frame, wire it into the given
* VkSubmitInfo so this submit waits on it instead of the caller issuing a
* vkQueueWaitIdle between submits. The semaphore handle and wait-stage mask
* are written to the caller-owned storage (must outlive vkQueueSubmit).
*
* When the renderer didn't submit this frame (zero-copy path) or a prior
* caller already consumed it, leaves waitSemaphoreCount at 0 — safe no-op.
*/
static inline void
vk_native_wire_renderer_wait(struct comp_vk_native_compositor *c,
VkSubmitInfo *si,
VkSemaphore *out_sem_storage,
VkPipelineStageFlags *out_stage_storage)
{
uint64_t sem_u64 = comp_vk_native_renderer_take_frame_done_semaphore(c->renderer);
if (sem_u64 == 0) {
return;
}
*out_sem_storage = (VkSemaphore)(uintptr_t)sem_u64;
// Renderer's atlas is read by every downstream pre-DP / DP / fallback
// path either as a sampled texture (FRAGMENT_SHADER), a copy/blit
// source (TRANSFER), or sampled into a render pass (COLOR_ATTACHMENT
// indirectly). Waiting at the union of those stages is correct and
// only costs scheduling — not a CPU stall.
*out_stage_storage =
VK_PIPELINE_STAGE_TRANSFER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
si->waitSemaphoreCount = 1;
si->pWaitSemaphores = out_sem_storage;
si->pWaitDstStageMask = out_stage_storage;
}

/*
*
* xrt_compositor member functions
Expand Down Expand Up @@ -2185,6 +2220,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
};
VkSemaphore pre_wait_sem = VK_NULL_HANDLE;
VkPipelineStageFlags pre_wait_stage = 0;
vk_native_wire_renderer_wait(c, &pre_si, &pre_wait_sem, &pre_wait_stage);
res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &pre_si, VK_NULL_HANDLE);
if (res == VK_SUCCESS) {
vk->vkQueueWaitIdle(vk->main_queue->queue);
Expand Down Expand Up @@ -2220,6 +2258,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
};
VkSemaphore post_wait_sem = VK_NULL_HANDLE;
VkPipelineStageFlags post_wait_stage = 0;
vk_native_wire_renderer_wait(c, &submit_info, &post_wait_sem, &post_wait_stage);
res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
if (res == VK_SUCCESS) {
vk->vkQueueWaitIdle(vk->main_queue->queue);
Expand All @@ -2244,6 +2285,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
};
VkSemaphore fb_wait_sem = VK_NULL_HANDLE;
VkPipelineStageFlags fb_wait_stage = 0;
vk_native_wire_renderer_wait(c, &submit_info, &fb_wait_sem, &fb_wait_stage);
res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
if (res == VK_SUCCESS) {
vk->vkQueueWaitIdle(vk->main_queue->queue);
Expand Down Expand Up @@ -2388,6 +2432,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
};
VkSemaphore tgt_pre_wait_sem = VK_NULL_HANDLE;
VkPipelineStageFlags tgt_pre_wait_stage = 0;
vk_native_wire_renderer_wait(c, &pre_si, &tgt_pre_wait_sem, &tgt_pre_wait_stage);
res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &pre_si, VK_NULL_HANDLE);
if (res == VK_SUCCESS) {
vk->vkQueueWaitIdle(vk->main_queue->queue);
Expand Down Expand Up @@ -2467,6 +2514,9 @@ vk_compositor_layer_commit(struct xrt_compositor *xc, xrt_graphics_sync_handle_t
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
};
VkSemaphore tgt_wait_sem = VK_NULL_HANDLE;
VkPipelineStageFlags tgt_wait_stage = 0;
vk_native_wire_renderer_wait(c, &submit_info, &tgt_wait_sem, &tgt_wait_stage);

res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
if (res == VK_SUCCESS) {
Expand Down
135 changes: 132 additions & 3 deletions src/xrt/compositor/vk_native/comp_vk_native_renderer.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,27 @@ struct comp_vk_native_renderer
//! When true, clear the atlas to alpha=0 (transparent) instead of
//! opaque black, so app alpha<1 regions survive to the present (issue #392).
bool transparent_background;

//! Binary semaphore signaled at end of draw()'s queue submit so the
//! compositor's downstream pre-DP submit can chain without a CPU
//! waitIdle between them. Single-use per draw — take_frame_done_semaphore()
//! returns this handle and clears @ref signal_pending so subsequent
//! submits in the same frame don't double-wait.
VkSemaphore frame_done_sem;

//! Fence signaled alongside @ref frame_done_sem. Waited at the start of
//! the next draw() to enforce CPU-side back-pressure and to know when
//! @ref pending_cmd is safe to free.
VkFence frame_done_fence;

//! Cmd buffer in flight from the previous draw(). NULL on first call.
//! Freed at the start of the next draw() once @ref frame_done_fence
//! signals, or at destroy() time after vkDeviceWaitIdle.
VkCommandBuffer pending_cmd;

//! True after draw() signals @ref frame_done_sem and until a downstream
//! submit takes it via take_frame_done_semaphore().
bool signal_pending;
};

static void
Expand Down Expand Up @@ -234,6 +255,35 @@ comp_vk_native_renderer_create(struct comp_vk_native_compositor *c,
return xret;
}

// Per-frame sync primitives for the combined-submit chain
// (renderer.draw() signals → compositor's pre-DP submit waits, no
// vkQueueWaitIdle between them). The fence starts signaled so the
// first draw() can free a (non-existent) previous cmd buffer and
// vkResetFences without an initial wait stall.
VkSemaphoreCreateInfo sem_ci = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
res = vk->vkCreateSemaphore(vk->device, &sem_ci, NULL, &r->frame_done_sem);
if (res != VK_SUCCESS) {
U_LOG_E("Failed to create renderer frame-done semaphore: %d", res);
destroy_atlas_resources(r);
vk->vkDestroyCommandPool(vk->device, r->cmd_pool, NULL);
free(r);
return XRT_ERROR_VULKAN;
}

VkFenceCreateInfo fence_ci = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.flags = VK_FENCE_CREATE_SIGNALED_BIT,
};
res = vk->vkCreateFence(vk->device, &fence_ci, NULL, &r->frame_done_fence);
if (res != VK_SUCCESS) {
U_LOG_E("Failed to create renderer frame-done fence: %d", res);
vk->vkDestroySemaphore(vk->device, r->frame_done_sem, NULL);
destroy_atlas_resources(r);
vk->vkDestroyCommandPool(vk->device, r->cmd_pool, NULL);
free(r);
return XRT_ERROR_VULKAN;
}

*out_renderer = r;
return XRT_SUCCESS;
}
Expand All @@ -250,6 +300,18 @@ comp_vk_native_renderer_destroy(struct comp_vk_native_renderer **renderer_ptr)

vk->vkDeviceWaitIdle(vk->device);

if (r->pending_cmd != VK_NULL_HANDLE) {
vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &r->pending_cmd);
r->pending_cmd = VK_NULL_HANDLE;
}

if (r->frame_done_fence != VK_NULL_HANDLE) {
vk->vkDestroyFence(vk->device, r->frame_done_fence, NULL);
}
if (r->frame_done_sem != VK_NULL_HANDLE) {
vk->vkDestroySemaphore(vk->device, r->frame_done_sem, NULL);
}

destroy_atlas_resources(r);

if (r->cmd_pool != VK_NULL_HANDLE) {
Expand Down Expand Up @@ -306,6 +368,46 @@ comp_vk_native_renderer_draw(struct comp_vk_native_renderer *r,
(void)left_eye;
(void)right_eye;

// Wait for the previous frame's submit to finish so we can safely
// free its cmd buffer and (later) reuse the per-frame fence. Fence
// starts signaled at create time so the first call returns
// immediately. We DON'T reset the fence here — that's deferred
// until just before the main vkQueueSubmit so any early-return
// failure (cmd-buffer alloc fails, drain fails, etc.) leaves the
// fence in its signaled state. Otherwise the next draw() would
// vkWaitForFences forever on a fence nothing ever signals.
vk->vkWaitForFences(vk->device, 1, &r->frame_done_fence, VK_TRUE, UINT64_MAX);

if (r->pending_cmd != VK_NULL_HANDLE) {
vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &r->pending_cmd);
r->pending_cmd = VK_NULL_HANDLE;
}

// Defensive: if the previous frame's frame-done semaphore was
// signaled but never consumed by a downstream submit (e.g. that
// submit failed, or resize() drained the GPU without going through
// the compositor's frame loop), drain it now via a no-op
// wait-submit so we don't double-signal a binary semaphore on the
// vkQueueSubmit below — that's undefined behavior per Vulkan spec.
// Only clear signal_pending if the drain actually went through;
// otherwise the next signaling submit would risk the double-signal UB.
if (r->signal_pending) {
VkPipelineStageFlags drain_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkSubmitInfo drain = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &r->frame_done_sem,
.pWaitDstStageMask = &drain_stage,
};
VkResult drain_res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &drain, VK_NULL_HANDLE);
if (drain_res == VK_SUCCESS) {
r->signal_pending = false;
} else {
U_LOG_E("Failed to drain stuck frame-done semaphore: %d", drain_res);
return XRT_ERROR_VULKAN;
}
}

VkCommandBufferAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = r->cmd_pool,
Expand Down Expand Up @@ -460,21 +562,38 @@ comp_vk_native_renderer_draw(struct comp_vk_native_renderer *r,

vk->vkEndCommandBuffer(cmd);

// Signal the frame-done semaphore on submit so the compositor's
// pre-DP submit (which reads from atlas_image) can chain after us
// without a CPU vkQueueWaitIdle. The fence catches the same event
// CPU-side so the next draw() can safely free this cmd buffer.
//
// Reset the fence here (not at the top of draw) so any early-return
// failure above leaves the fence in its previous signaled state
// rather than deadlocking the next draw() on a fence nothing signals.
vk->vkResetFences(vk->device, 1, &r->frame_done_fence);

VkSubmitInfo submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &r->frame_done_sem,
};

res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
res = vk->vkQueueSubmit(vk->main_queue->queue, 1, &submit_info, r->frame_done_fence);
if (res != VK_SUCCESS) {
U_LOG_E("Failed to submit renderer commands: %d", res);
vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &cmd);
// Re-signal the fence with a no-op submit so the next draw()
// doesn't deadlock waiting on a fence that vkQueueSubmit
// refused to associate with a batch.
VkSubmitInfo signal_only = {.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO};
vk->vkQueueSubmit(vk->main_queue->queue, 1, &signal_only, r->frame_done_fence);
return XRT_ERROR_VULKAN;
}

vk->vkQueueWaitIdle(vk->main_queue->queue);
vk->vkFreeCommandBuffers(vk->device, r->cmd_pool, 1, &cmd);
r->pending_cmd = cmd;
r->signal_pending = true;

return XRT_SUCCESS;
}
Expand Down Expand Up @@ -700,3 +819,13 @@ comp_vk_native_renderer_set_transparent(struct comp_vk_native_renderer *r, bool
{
r->transparent_background = transparent_background;
}

uint64_t
comp_vk_native_renderer_take_frame_done_semaphore(struct comp_vk_native_renderer *r)
{
if (!r->signal_pending) {
return 0;
}
r->signal_pending = false;
return (uint64_t)(uintptr_t)r->frame_done_sem;
}
21 changes: 21 additions & 0 deletions src/xrt/compositor/vk_native/comp_vk_native_renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,27 @@ void
comp_vk_native_renderer_set_transparent(struct comp_vk_native_renderer *renderer,
bool transparent_background);

/*!
* Take ownership of the binary semaphore signaled by the most recent
* @ref comp_vk_native_renderer_draw submit. The compositor passes this
* as a wait semaphore on its downstream pre-DP submit, replacing the
* previous vkQueueWaitIdle between renderer and compositor submits.
*
* Returns VK_NULL_HANDLE (cast to uint64_t) when there is no pending
* signal — e.g. on the zero-copy path where draw() was skipped, or when
* a previous caller in the same frame already consumed it.
*
* Single-consumer per draw() call. Caller is responsible for waiting on
* the returned handle at an appropriate pipeline stage.
*
* @param renderer The renderer.
* @return VkSemaphore as uint64_t, or 0 (VK_NULL_HANDLE) when none pending.
*
* @ingroup comp_vk_native
*/
uint64_t
comp_vk_native_renderer_take_frame_done_semaphore(struct comp_vk_native_renderer *renderer);

#ifdef __cplusplus
}
#endif
Loading