Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 10 additions & 12 deletions src/components/DebugPanel/GpuTimingsSection.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,12 @@ import type { TimingSlotName } from '../../@types/gpu/timing/TimingSlotName';
import { HDR_PASSES } from '../../services/engine/frame/passes';
import { Sparkline } from './Sparkline';

// Row display order: HDR_PASSES (one timing slot per pass), then the
// three out-of-HDR passes in render order — `tone-map` (HDR→swap-
// chain blit), `ui-overlay` (marker-lines + labels combined; see
// `services/engine/frame/uiOverlay.ts` for why they share one slot),
// and `pick` (its own encoder, submitted by the pick renderer).
// Reordering passes in `passes/index.ts` automatically reorders the
// timing UI for the HDR portion.
// Row order matches encoder draw order. `scalar-volume` runs in
// `encodeVolumes` before the HDR loop, so it's listed explicitly; the
// HDR_PASSES spread covers the loop interior. Reorders in
// `passes/index.ts` propagate here automatically.
const DISPLAY_SLOT_ORDER: readonly TimingSlotName[] = [
'scalar-volume',
...HDR_PASSES.map((p) => p.name as TimingSlotName),
'tone-map',
'ui-overlay',
Expand Down Expand Up @@ -135,20 +133,20 @@ export function GpuTimingsSection({ service }: GpuTimingsSectionProps): ReactEle

// ── Branch 3: live data ───────────────────────────────────────────
const stats = statsRef.current;
// Sum of CURRENT-FRAME timings for the header. Only count slots that
// actually ran this frame (staleFrames === 0); idle slots' last
// value is no longer the live frame's cost.
// Header sums per-slot AVG_WINDOW averages, matching the visible
// row values. Stale slots excluded so the total reflects current
// GPU work, not a gated-off subsystem's last cost.
let frameTotalMs = 0;
for (const [, row] of stats) {
if (row.staleFrames === 0 && row.recent.length > 0) {
frameTotalMs += row.recent[row.recent.length - 1]!;
frameTotalMs += row.recent.reduce((a, b) => a + b, 0) / row.recent.length;
}
}

return (
<details open>
<summary style={{ fontWeight: 'bold', cursor: 'pointer' }}>
GPU Timings (last frame: {frameTotalMs.toFixed(1)} ms)
GPU Timings (avg {AVG_WINDOW}f: {frameTotalMs.toFixed(1)} ms)
</summary>
<div style={{ marginTop: 4 }}>
{/*
Expand Down
6 changes: 5 additions & 1 deletion src/services/gpu/device.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,15 @@ export async function initGpu(canvas: HTMLCanvasElement): Promise<GpuContext> {
if (!navigator.gpu) throw new Error('WebGPU not supported in this browser.');

// Step 1 — Request an adapter.
// `powerPreference: 'high-performance'` asks the browser to pick the
// discrete GPU on multi-GPU systems (dual-GPU MacBook Pros, desktops
// with both integrated and dedicated GPUs). It's a no-op on single-GPU
// machines (Apple Silicon, most laptops without a discrete card).
// `requestAdapter()` returns null when the browser has no usable GPU
// (e.g. headless test environments, or a machine whose GPU is blocked by
// a corporate driver policy). We treat that as a hard stop.
// See: https://www.w3.org/TR/webgpu/#dom-gpu-requestadapter
const adapter = await navigator.gpu.requestAdapter();
const adapter = await navigator.gpu.requestAdapter({ powerPreference: 'high-performance' });
if (!adapter) throw new Error('No WebGPU adapter available.');

// Step 2 — Request a device, opting into `timestamp-query` when the
Expand Down
32 changes: 3 additions & 29 deletions src/services/gpu/shaders/points/colorFragment.wesl
Original file line number Diff line number Diff line change
Expand Up @@ -184,37 +184,11 @@ fn fs(in: VSOut) -> @location(0) vec4<f32> {
if (r2 > 1.0) { discard; }

// Gaussian-like falloff: bright at centre (r²=0 → e⁰=1), fading to
// e⁻⁴ ≈ 0.018 at the edge (r²=1).
// e⁻⁴ ≈ 0.018 at the edge (r²=1). The per-instance modulators
// (Schechter, angular reweight, depth fade) are folded into
// 'in.intensity' by the vertex stage — see vertex.wesl.
var alpha = exp(-r2 * 4.0);

// ── Schechter density correction (mode 3) ────────────────────────────
//
// Modulate alpha by the per-galaxy ratio 'clamp(N_ref / n(d), 0, 10)'
// baked at upload time into 'in.schechterRatio'. Originally a 200-step
// trapezoidal integral evaluated PER FRAGMENT; now a single multiply.
let schechterAlpha_ = select(1.0, in.schechterRatio, u.biasMode == 3u);
alpha = alpha * schechterAlpha_;

// ── HEALPix angular re-weight (mode 4) ────────────────────────────
//
// Modulate alpha by the per-galaxy ratio
// 'clamp(medianCellCount / localCellCount, 0.1, 10)' baked at toggle
// time into 'in.angularDensityWeight'. Down-weights galaxies in
// over-dense angular cells and up-weights galaxies in sparse cells,
// flattening radial pencil-beam-jet artefacts.
let angWeight = select(1.0, in.angularDensityWeight, u.biasMode == 4u);
alpha = alpha * angWeight;

// ── Camera-distance depth fade ───────────────────────────────────────────
//
// Every line through the catalog origin under additive billboards
// accumulates hundreds of overlapping galaxies in a single screen
// pixel. The depth-fade multiplier is pre-computed in the vertex
// stage and flat-interpolated as 'in.depthFade' (the vertex stage
// already handles the 'u.depthFadeEnabled' gate, so this is
// unconditionally a multiply).
alpha = alpha * in.depthFade;

// ── Procedural-disk crossfade-OUT ────────────────────────────────────────
//
// The thumbnail subsystem's procedural-disk pass fades IN across
Expand Down
43 changes: 11 additions & 32 deletions src/services/gpu/shaders/points/io.wesl
Original file line number Diff line number Diff line change
Expand Up @@ -276,14 +276,16 @@ struct VSOut {
@location(0) uv: vec2<f32>,

// Pre-computed colour for this point (from the colourIndex ramp).
// Interpolated across the quad by the rasteriser — but since all 6
// vertices of one instance share the same tint, there is no visible
// interpolation.
@location(1) tint: vec3<f32>,
// Flat-interpolated — all 6 vertices of an instance share the value,
// so smooth interpolation would do work for an identical result.
@location(1) @interpolate(flat) tint: vec3<f32>,

// Combined brightness: magnitude-based intensity × global brightness
// knob × per-instance bias-mode alpha (1/V_max).
@location(2) intensity: f32,
// Per-instance brightness with every per-instance modulator folded in:
// magnitude-based intensity × brightness slider × vMax (mode 2) ×
// Schechter (mode 3) × angular reweight (mode 4) × depth-fade
// (camera-distance falloff). Fragment multiplies in only per-pixel
// terms (Gaussian falloff, procedural-disk fade, source fade).
@location(2) @interpolate(flat) intensity: f32,

// Packed (source, localIdx) identity used by 'fsPick' to write the
// pick texture. Flat-interpolated because integers can't be linearly
Expand All @@ -296,8 +298,8 @@ struct VSOut {

// Forwarded 'abs(axisRatio)' so the fragment stage's elliptical mask
// uses the unsigned magnitude. Sign bit was the fallback flag (now
// extracted into 'isFallback').
@location(5) axisRatio: f32,
// extracted into 'isFallback'). Per-instance constant.
@location(5) @interpolate(flat) axisRatio: f32,

// Pre-computed cos/sin of the position-angle rotation. Computed once
// per primitive in 'vs' instead of per fragment, saving millions of
Expand All @@ -310,29 +312,6 @@ struct VSOut {
// measurements.
@location(7) @interpolate(flat) isFallback: u32,

// Origin-relative distance in Mpc, forwarded for future distance-
// dependent fragment effects. Currently unused in the fragment stage
// but kept as plumbed.
@location(8) @interpolate(flat) dMpc: f32,

// Per-galaxy Schechter density-correction ratio. Read in 'fs' only
// when 'u.biasMode == 3u'.
@location(9) @interpolate(flat) schechterRatio: f32,

// Per-galaxy HEALPix angular re-weight. Read in 'fs' only when
// 'u.biasMode == 4u'.
@location(10) @interpolate(flat) angularDensityWeight: f32,

// Distance from the camera to this galaxy in Mpc. Forwarded for
// potential per-fragment depth-driven effects.
@location(11) @interpolate(flat) camDistMpc: f32,

// Pre-computed depth-fade multiplier '1 / (1 + (camDist/FALLOFF_HALF)²)',
// gated by 'u.depthFadeEnabled' (passes through 1.0 when off).
// Per-instance constant — flat-interpolated for one mul per primitive
// instead of per fragment.
@location(12) @interpolate(flat) depthFade: f32,

// Per-instance billboard radius in screen-space pixels. Used by the
// fragment stage to fade points-pass alpha across the procedural-
// disk crossfade band. All 6 vertices share the same value.
Expand Down
91 changes: 40 additions & 51 deletions src/services/gpu/shaders/points/vertex.wesl
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,6 @@ fn vs(
earlyOut.paCs = 1.0;
earlyOut.paSn = 0.0;
earlyOut.isFallback = 0u;
earlyOut.dMpc = dMpc;
earlyOut.schechterRatio = 0.0;
earlyOut.angularDensityWeight = 1.0;
earlyOut.camDistMpc = 0.0;
earlyOut.depthFade = 1.0;
earlyOut.sizePx = 0.0;
return earlyOut;
}
Expand Down Expand Up @@ -207,29 +202,49 @@ fn vs(
// Look up the colour for this point's *rest-frame* colour index.
out.tint = ramp(restColorIndex);

// ── MAGNITUDE → INTENSITY ────────────────────────────────────────────────
// ── MAGNITUDE → INTENSITY, with every per-instance modulator folded in ──
//
// intensity = (22 - magnitude) / 8
// intensity = clamp((22 - magnitude) / 8, 0.05, 1.0) // mag 14 → 1.0,
// mag 22 → 0.05
// × u.brightness // global slider
// × vMaxWeight (mode 2: 1/V_max)
// × schechterRatio (mode 3: Schechter LF)
// × angularReweight (mode 4: HEALPix)
// × depthFade (camera-distance falloff)
//
// magnitude 14 → 1.0 (brightest)
// magnitude 22 → 0.0 (faint limit)
//
// We clamp to [0.05, 1.0] rather than [0, 1] so that even the faintest
// objects remain *barely* visible — a hard zero would create gaps in
// the distribution.
//
// ── 1/V_max alpha modulation ─────────────────────────────────────────────
//
// When 'u.biasMode == 2u' (BiasMode.VMax), multiply the intensity by
// 'p.vMaxWeight'. This dims intrinsically-bright galaxies whose
// detectability volume V_max exceeds the reference volume V_ref — they
// visible across a much larger slice of space than their faint
// companions, so without the down-weighting they'd over-represent
// themselves visually. The 'select(1.0, p.vMaxWeight, …)' keeps the
// OTHER three modes (None, VolumeLimited, Schechter, AngularReweight)
// unchanged.
// Folding the four mode/distance multipliers in here means the fragment
// multiplies only per-pixel terms (Gaussian + crossfade + source fade).
// Mathematically identical to applying them per-pixel because all five
// factors are per-instance constants.
let vMaxAlpha = select(1.0, p.vMaxWeight, u.biasMode == 2u);
out.intensity = clamp((22.0 - p.magnitude) / 8.0, 0.05, 1.0) * u.brightness * vMaxAlpha;
let schechterMult = select(1.0, p.schechterRatio, u.biasMode == 3u);
let angularMult = select(1.0, p.angularDensityWeight, u.biasMode == 4u);

// Depth-fade: 1 / (1 + (camDist/FALLOFF_HALF)²), gated by depthFadeEnabled.
let FALLOFF_HALF_MPC = 1000.0;
let camDistRel = distanceMpc / FALLOFF_HALF_MPC;
let depthFadeRaw = 1.0 / (1.0 + camDistRel * camDistRel);
let depthFadeMult = select(1.0, depthFadeRaw, u.depthFadeEnabled == 1u);

out.intensity = clamp((22.0 - p.magnitude) / 8.0, 0.05, 1.0)
* u.brightness
* vMaxAlpha
* schechterMult
* angularMult
* depthFadeMult;

// Invisibility cull: galaxies whose folded intensity falls below this
// threshold contribute imperceptibly to the additive HDR target, so
// we emit a degenerate clip position (outside the [-1, 1] NDC cube)
// and let the rasteriser drop the primitive before any fragment work.
// Selected galaxies bypass the cull so the selection halo never
// vanishes on a faint pick. Pick fragment shares this vertex stage,
// so culled galaxies also become non-pickable — acceptable since
// they were never visible.
let INVISIBILITY_THRESHOLD = 0.005;
if (out.intensity < INVISIBILITY_THRESHOLD && !isSelected) {
out.clip = vec4<f32>(2.0, 2.0, 2.0, 1.0);
}

// Forward the per-instance packed identity to 'fsPick'.
// The visual 'fs' ignores this field.
Expand All @@ -256,32 +271,6 @@ fn vs(
out.paCs = cos(paRad);
out.paSn = sin(paRad);

// Forward origin-relative distance for future distance-dependent
// fragment effects (currently unused in 'fs').
out.dMpc = dMpc;

// Forward the per-galaxy Schechter density ratio. The intensity above
// already folded it into 'out.intensity' for mode 3 — forwarding it
// through VSOut keeps the attribute available to the fragment in case
// future tweaks (e.g. tint modulation) want to read it. With
// @interpolate(flat) the GPU writes the value once per primitive.
out.schechterRatio = p.schechterRatio;

// Forward the per-galaxy HEALPix angular re-weight (default 1.0).
out.angularDensityWeight = p.angularDensityWeight;

// Forward camera-relative distance for fragment-stage depth effects.
out.camDistMpc = distanceMpc;

// Pre-compute the depth-fade multiplier here so the fragment doesn't
// re-derive it per pixel. Curve: '1 / (1 + (camDist / FALLOFF_HALF)²)'.
// The 1000 Mpc half-distance + the 'u.depthFadeEnabled' gate are
// resolved here so the fragment stage gets a single multiplier.
let FALLOFF_HALF_MPC = 1000.0;
let camDistRel = distanceMpc / FALLOFF_HALF_MPC;
let depthFadeRaw = 1.0 / (1.0 + camDistRel * camDistRel);
out.depthFade = select(1.0, depthFadeRaw, u.depthFadeEnabled == 1u);

// Forward the per-instance billboard radius in screen-pixels so the
// fragment stage can fade points-pass alpha across the procedural-
// disk crossfade band.
Expand Down
2 changes: 1 addition & 1 deletion src/services/gpu/shaders/scalarVolume/fragment.wesl
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ struct VolumeUniforms {

@group(1) @binding(0) var<uniform> fade: FadeUniforms;

const STEP_COUNT: i32 = 192;
const STEP_COUNT: i32 = 128;
const SATURATION_THRESHOLD: f32 = 0.99;

struct FsIn {
Expand Down
Loading