diff --git a/.github/workflows/build-pull-request.yaml b/.github/workflows/build-pull-request.yaml index 2a8b63fe7..b04e065b2 100644 --- a/.github/workflows/build-pull-request.yaml +++ b/.github/workflows/build-pull-request.yaml @@ -79,3 +79,4 @@ jobs: docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_GPURaytrace.sh docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_GPUPhotonFileSource.sh docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_GPUPhotonSource_8x8SiPM.sh + docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_wavelength_shifting.sh diff --git a/README.md b/README.md index b1933482c..9f02b0cb7 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,7 @@ EIC-Opticks provides several examples demonstrating GPU-accelerated optical phot | `GPUPhotonSource` | Optical photons (torch) | Any GDML | G4 + GPU side-by-side validation | | `GPUPhotonSourceMinimal` | Optical photons (torch) | Any GDML | GPU-only test | | `GPUPhotonFileSource` | Optical photons (text file) | Any GDML | GPU-only, user-defined photons from file | +| WLS test | Wavelength shifting | WLS sphere + detector shell | Validate GPU WLS physics | ### Example 1: GPUCerenkov (Cerenkov Only) @@ -297,6 +298,55 @@ GPUPhotonFileSource -g tests/geom/opticks_raindrop.gdml -p my_photons.txt -m run **Source files:** `src/GPUPhotonFileSource.cpp`, `src/GPUPhotonFileSource.h` +### Example 6: Wavelength Shifting (WLS) Test + +This test validates the GPU wavelength shifting implementation using a dedicated +geometry with a WLS sphere surrounded by a detector shell: + +``` +Geometry: wls_test.gdml +├── Air world (r=200 mm) +│ ├── WLS sphere (r=20 mm) ← Absorbs UV, re-emits visible +│ └── Glass detector shell (r=28-30 mm) ← 100% detection efficiency +``` + +The WLS material absorbs UV photons (350 nm) and re-emits them isotropically at +longer wavelengths (peak ~481 nm) with a 0.5 ns exponential time delay. The test +fires 1000 monochromatic 350 nm photons from the origin into the WLS sphere. + +```bash +GPUPhotonSourceMinimal -g tests/geom/wls_test.gdml -c wls_test -m tests/run.mac -s 42 +``` + +**Expected results:** +- ~990/1000 photons detected (10 absorbed after failing energy conservation) +- All hits wavelength-shifted from 350 nm to mean ~487 nm +- Energy conservation: no hits with wavelength < 350 nm +- Isotropic re-emission: mean momentum direction near zero +- Time delay: mean ~0.6 ns (propagation + 0.5 ns exponential WLS decay) + +**GDML WLS properties required** (same syntax for G4 10.x and 11.x): +```xml + + + + + + + + + + + + +``` + +Unlike scintillation properties, WLS property names are the same in both Geant4 +10.x and 11.x — no dual-naming is needed. + +**Test files:** `tests/geom/wls_test.gdml`, `config/wls_test.json` +**Implementation docs:** `docs/WLS_IMPLEMENTATION.md` + ### Torch configuration `GPUPhotonSource` and `GPUPhotonSourceMinimal` read photon source parameters from a @@ -317,6 +367,7 @@ JSON config file (default `config/dev.json`). Key fields: |---------|-------------|-------------|-----------------|----------------------|---------------------| | Cerenkov genstep collection | ✓ | ✓ | ✗ | ✗ | ✗ | | Scintillation genstep collection | ✗ | ✓ | ✗ | ✗ | ✗ | +| Wavelength shifting (WLS) | ✓ | ✓ | ✓ | ✓ | ✓ | | Torch photon generation | ✗ | ✗ | ✓ | ✓ | ✗ | | Photon input from text file | ✗ | ✗ | ✗ | ✗ | ✓ | | G4 optical photon tracking | ✓ | ✓ | ✓ | ✗ | ✗ | diff --git a/config/wls_100k.json b/config/wls_100k.json new file mode 100644 index 000000000..26166e47b --- /dev/null +++ b/config/wls_100k.json @@ -0,0 +1,30 @@ +{ + "torch": { + "gentype": "TORCH", + "trackid": 0, + "matline": 0, + "numphoton": 100000, + + "pos": [0.0, 0.0, 0.0], + "time": 0.0, + + "mom": [0.0, 0.0, 1.0], + "weight": 0.0, + + "pol": [1.0, 0.0, 0.0], + "wavelength": 350.0, + + "zenith": [0.0, 1.0], + "azimuth": [0.0, 1.0], + + "radius": 0.0, + "distance": 0.0, + "mode": 255, + "type": "disc" + }, + + "event": { + "mode": "DebugLite", + "maxslot": 10000000 + } +} diff --git a/config/wls_scatter_viz.json b/config/wls_scatter_viz.json new file mode 100644 index 000000000..76bd9ece2 --- /dev/null +++ b/config/wls_scatter_viz.json @@ -0,0 +1,30 @@ +{ + "torch": { + "gentype": "TORCH", + "trackid": 0, + "matline": 0, + "numphoton": 10000, + + "pos": [0.0, 0.0, -25.0], + "time": 0.0, + + "mom": [0.0, 0.0, 1.0], + "weight": 0.0, + + "pol": [1.0, 0.0, 0.0], + "wavelength": 350.0, + + "zenith": [0.0, 0.3], + "azimuth": [0.0, 1.0], + + "radius": 0.0, + "distance": 0.0, + "mode": 255, + "type": "disc" + }, + + "event": { + "mode": "HitPhoton", + "maxslot": 100000 + } +} diff --git a/config/wls_slab.json b/config/wls_slab.json new file mode 100644 index 000000000..bfd412305 --- /dev/null +++ b/config/wls_slab.json @@ -0,0 +1,30 @@ +{ + "torch": { + "gentype": "TORCH", + "trackid": 0, + "matline": 0, + "numphoton": 100000, + + "pos": [0.0, 0.0, -50.0], + "time": 0.0, + + "mom": [0.0, 0.0, 1.0], + "weight": 0.0, + + "pol": [1.0, 0.0, 0.0], + "wavelength": 400.0, + + "zenith": [0.0, 0.0], + "azimuth": [0.0, 1.0], + + "radius": 0.0, + "distance": 0.0, + "mode": 255, + "type": "disc" + }, + + "event": { + "mode": "DebugLite", + "maxslot": 10000000 + } +} diff --git a/config/wls_test.json b/config/wls_test.json new file mode 100644 index 000000000..b8572b5b9 --- /dev/null +++ b/config/wls_test.json @@ -0,0 +1,30 @@ +{ + "torch": { + "gentype": "TORCH", + "trackid": 0, + "matline": 0, + "numphoton": 1000, + + "pos": [0.0, 0.0, 0.0], + "time": 0.0, + + "mom": [0.0, 0.0, 1.0], + "weight": 0.0, + + "pol": [1.0, 0.0, 0.0], + "wavelength": 350.0, + + "zenith": [0.0, 1.0], + "azimuth": [0.0, 1.0], + + "radius": 0.0, + "distance": 0.0, + "mode": 255, + "type": "disc" + }, + + "event": { + "mode": "DebugLite", + "maxslot": 1000000 + } +} diff --git a/examples/benchmark_apex.sh b/examples/benchmark_apex.sh new file mode 100755 index 000000000..2a4de6f83 --- /dev/null +++ b/examples/benchmark_apex.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# benchmark_apex.sh — Measure GPU vs G4 speedup on apex.gdml +# +# Usage: +# ./examples/benchmark_apex.sh + +GDML="apex.gdml" +MACRO="tests/run.mac" +EPS="0.00001" +EPS0="0.0006" +OUTDIR="plots" +CONFIG="det_debug" + +if [ ! -f "$GDML" ]; then + echo "ERROR: $GDML not found. Run from the eic-opticks root directory." + exit 1 +fi + +echo "=== apex.gdml Benchmark ===" +echo "eps=$EPS, eps0=$EPS0" +echo "Running..." + +LOGFILE=$(mktemp /tmp/bench_XXXXXX.txt) +OPTICKS_MAX_BOUNCE=1000 \ +OPTICKS_PROPAGATE_EPSILON=$EPS \ +OPTICKS_PROPAGATE_EPSILON0=$EPS0 \ +GPURaytrace -g "$GDML" -m "$MACRO" -c "$CONFIG" &> "$LOGFILE" || true + +GPU_TIME=$(grep "Simulation time:" "$LOGFILE" | awk '{print $3}') +G4_LINE=$(grep "^ User=" "$LOGFILE" | tail -1) +G4_CPU=$(echo "$G4_LINE" | grep -oP 'User=\K[0-9.]+') +G4_WALL=$(echo "$G4_LINE" | grep -oP 'Real=\K[0-9.]+') +NPHOTONS=$(grep "NumCollected:" "$LOGFILE" | tail -1 | awk '{print $NF}') +GPU_HITS=$(grep "Opticks: NumHits:" "$LOGFILE" | awk '{print $NF}') +G4_HITS=$(grep "Geant4: NumHits:" "$LOGFILE" | awk '{print $NF}') + +if [ -z "$GPU_TIME" ] || [ -z "$G4_CPU" ]; then + echo "ERROR: Could not parse timing from output" + tail -30 "$LOGFILE" + rm -f "$LOGFILE" + exit 1 +fi + +python3 -c " +gpu = float('$GPU_TIME') +g4_cpu = float('$G4_CPU') +g4_wall = float('$G4_WALL') +nphotons = int('$NPHOTONS') +gpu_hits = int('$GPU_HITS') +g4_hits = int('$G4_HITS') +hit_diff = (gpu_hits - g4_hits) / g4_hits * 100 if g4_hits > 0 else 0 + +print() +print(f'Photons: {nphotons:>10,}') +print(f'GPU sim time: {gpu:>10.4f} s') +print(f'G4 CPU time: {g4_cpu:>10.2f} s') +print(f'G4 wall time: {g4_wall:>10.2f} s') +print() +print(f'Speedup (CPU): {g4_cpu/gpu:>10.0f}x') +print(f'Speedup (wall): {g4_wall/gpu:>10.0f}x') +print() +print(f'GPU rate: {nphotons/gpu/1e6:>10.1f} M photons/s') +print(f'G4 rate: {nphotons/g4_cpu/1e3:>10.1f} k photons/s') +print() +print(f'GPU hits: {gpu_hits:>10}') +print(f'G4 hits: {g4_hits:>10}') +print(f'Hit diff: {hit_diff:>+9.1f}%') +" + +rm -f "$LOGFILE" + +# Generate comparison plots if hit files exist +if [ -f "gpu_hits.npy" ] && [ -f "g4_hits.npy" ]; then + echo "" + echo "=== Generating comparison plots ===" + python3 optiphy/ana/run_and_compare.py --gpu-hits gpu_hits.npy --g4-hits g4_hits.npy --outdir "$OUTDIR" 2>&1 | tail -15 + echo "Plots saved to $OUTDIR/" +fi diff --git a/optiphy/ana/compare_aligned.py b/optiphy/ana/compare_aligned.py new file mode 100644 index 000000000..b044ce6f2 --- /dev/null +++ b/optiphy/ana/compare_aligned.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +""" +compare_aligned.py - Photon-by-photon comparison of GPU vs G4 aligned simulations. + +Usage: + python compare_aligned.py + +Performs: + 1. Per-photon flag comparison (exact match rate) + 2. Position comparison at multiple thresholds + 3. Chi-squared test on flag distributions (gold-standard validation metric) + 4. Glancing-angle photon identification (normal sign ambiguity) + 5. Divergent photon listing +""" +import sys +import numpy as np + +FLAG_NAMES = { + 0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT", + 0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB", + 0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT", + 0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT", 0x8000: "MISS", +} + +def flag_name(f): + return FLAG_NAMES.get(f, f"0x{f:04x}") + +def extract_flag(photon): + """Extract flag from q3.x (orient_boundary_flag) - lower 16 bits.""" + q3 = photon.view(np.uint32).reshape(-1, 4, 4) + return q3[:, 3, 0] & 0xFFFF + +def chi2_flag_distribution(gpu_flags, g4_flags): + """ + Chi-squared comparison of flag distributions. + + Compares the frequency of each flag value between GPU and G4. + This is the opticks gold-standard validation metric. + + Returns (chi2, ndof, flags_used, gpu_counts, g4_counts). + """ + all_flags = sorted(set(gpu_flags) | set(g4_flags)) + gpu_counts = np.array([(gpu_flags == f).sum() for f in all_flags], dtype=float) + g4_counts = np.array([(g4_flags == f).sum() for f in all_flags], dtype=float) + + total = gpu_counts + g4_counts + mask = total > 0 + gpu_c = gpu_counts[mask] + g4_c = g4_counts[mask] + tot = total[mask] + flags_used = [f for f, m in zip(all_flags, mask) if m] + + n_gpu = gpu_c.sum() + n_g4 = g4_c.sum() + expected_gpu = tot * n_gpu / (n_gpu + n_g4) + expected_g4 = tot * n_g4 / (n_gpu + n_g4) + + chi2 = 0.0 + for i in range(len(flags_used)): + if expected_gpu[i] > 0: + chi2 += (gpu_c[i] - expected_gpu[i])**2 / expected_gpu[i] + if expected_g4[i] > 0: + chi2 += (g4_c[i] - expected_g4[i])**2 / expected_g4[i] + + ndof = max(len(flags_used) - 1, 1) + return chi2, ndof, flags_used, gpu_c, g4_c + +def identify_glancing(gpu, g4): + """ + Identify glancing-angle photons where the normal sign ambiguity + causes momentum negation between GPU and G4. + + At glancing incidence cos(theta) ~ 0, float32 vs float64 can produce + opposite normal signs, reflecting the photon in the opposite direction. + These photons have matching flags but very different positions. + + Returns boolean mask of glancing photons. + """ + gpu_mom = gpu[:, 1, :3] + g4_mom = g4[:, 1, :3] + + # Normalize momenta (should already be unit vectors, but be safe) + gpu_norm = np.linalg.norm(gpu_mom, axis=1, keepdims=True) + g4_norm = np.linalg.norm(g4_mom, axis=1, keepdims=True) + gpu_norm[gpu_norm == 0] = 1 + g4_norm[g4_norm == 0] = 1 + + gpu_hat = gpu_mom / gpu_norm + g4_hat = g4_mom / g4_norm + + # Dot product of momentum directions: -1 = fully negated (normal flip) + mom_dot = np.sum(gpu_hat * g4_hat, axis=1) + + # Glancing: momentum vectors are nearly anti-parallel (dot ~ -1) + glancing = mom_dot < -0.5 + return glancing, mom_dot + +def main(): + if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + + gpu = np.load(sys.argv[1]) + g4 = np.load(sys.argv[2]) + + print(f"GPU shape: {gpu.shape}") + print(f"G4 shape: {g4.shape}") + + n = min(len(gpu), len(g4)) + gpu = gpu[:n] + g4 = g4[:n] + + gpu_flags = extract_flag(gpu) + g4_flags = extract_flag(g4) + + # ---- 1. Per-photon flag comparison ---- + match = gpu_flags == g4_flags + n_match = match.sum() + n_diff = n - n_match + print(f"\n{'='*60}") + print(f"FLAG COMPARISON ({n} photons)") + print(f"{'='*60}") + print(f" Matching: {n_match} ({100*n_match/n:.1f}%)") + print(f" Differ: {n_diff} ({100*n_diff/n:.1f}%)") + + # ---- 2. Position comparison ---- + gpu_pos = gpu[:, 0, :3] + g4_pos = g4[:, 0, :3] + pos_diff = np.linalg.norm(gpu_pos - g4_pos, axis=1) + zero_g4 = np.all(g4_pos == 0, axis=1) + + valid = ~zero_g4 + n_valid = valid.sum() + print(f"\n{'='*60}") + print(f"POSITION COMPARISON ({n_valid} valid, {zero_g4.sum()} unrecorded)") + print(f"{'='*60}") + if n_valid > 0: + vdiff = pos_diff[valid] + print(f" Mean dist: {vdiff.mean():.4f} mm") + print(f" Max dist: {vdiff.max():.4f} mm") + print(f" < 0.01 mm: {(vdiff < 0.01).sum()} ({100*(vdiff < 0.01).sum()/n_valid:.1f}%)") + print(f" < 0.1 mm: {(vdiff < 0.1).sum()} ({100*(vdiff < 0.1).sum()/n_valid:.1f}%)") + print(f" < 1.0 mm: {(vdiff < 1.0).sum()} ({100*(vdiff < 1.0).sum()/n_valid:.1f}%)") + + # ---- 3. Chi-squared test on flag distributions ---- + print(f"\n{'='*60}") + print(f"CHI-SQUARED TEST (flag distribution)") + print(f"{'='*60}") + + chi2_val, ndof, flags_used, gpu_c, g4_c = chi2_flag_distribution(gpu_flags, g4_flags) + + print(f" {'Flag':<20s} {'GPU':>8s} {'G4':>8s} {'Diff':>8s}") + print(f" {'-'*20} {'-'*8} {'-'*8} {'-'*8}") + for i, f in enumerate(flags_used): + diff = int(gpu_c[i] - g4_c[i]) + sign = "+" if diff > 0 else "" + print(f" {flag_name(f):<20s} {int(gpu_c[i]):>8d} {int(g4_c[i]):>8d} {sign}{diff:>7d}") + + deviant_frac = 100 * n_diff / n if n > 0 else 0 + print(f"\n chi2/ndof = {chi2_val:.2f}/{ndof} = {chi2_val/ndof:.2f}") + print(f" deviant fraction: {deviant_frac:.2f}% ({n_diff}/{n})") + + # ---- 4. Glancing-angle analysis ---- + print(f"\n{'='*60}") + print(f"GLANCING-ANGLE ANALYSIS (normal sign ambiguity)") + print(f"{'='*60}") + + glancing, mom_dot = identify_glancing(gpu, g4) + n_glancing = glancing.sum() + + # Among matching-flag photons, how many are glancing with large pos diff? + match_glancing = match & glancing + match_large_pos = match & (pos_diff > 1.0) + match_glancing_large = match & glancing & (pos_diff > 1.0) + + print(f" Glancing photons (mom dot < -0.5): {n_glancing}") + print(f" Matching flag + pos diff > 1mm: {match_large_pos.sum()}") + print(f" Of those, glancing: {match_glancing_large.sum()}") + if match_large_pos.sum() > 0: + frac = 100 * match_glancing_large.sum() / match_large_pos.sum() + print(f" Fraction explained by glancing: {frac:.0f}%") + + # Position stats excluding glancing photons + non_glancing_match = match & ~glancing & valid + if non_glancing_match.sum() > 0: + ng_diff = pos_diff[non_glancing_match] + print(f"\n Position (matching, non-glancing, {non_glancing_match.sum()} photons):") + print(f" Max dist: {ng_diff.max():.6f} mm") + print(f" Mean dist: {ng_diff.mean():.6f} mm") + print(f" < 0.01 mm: {(ng_diff < 0.01).sum()} ({100*(ng_diff < 0.01).sum()/non_glancing_match.sum():.1f}%)") + + # ---- 5. Divergent photon listing ---- + if n_diff > 0: + div_idx = np.where(~match)[0] + print(f"\n{'='*60}") + print(f"DIVERGENT PHOTONS (first 10 of {n_diff})") + print(f"{'='*60}") + for i in div_idx[:10]: + gf = flag_name(gpu_flags[i]) + cf = flag_name(g4_flags[i]) + gp = gpu_pos[i] + cp = g4_pos[i] + print(f" [{i:5d}] GPU: {gf:20s} pos=({gp[0]:8.2f},{gp[1]:8.2f},{gp[2]:8.2f})") + print(f" G4: {cf:20s} pos=({cp[0]:8.2f},{cp[1]:8.2f},{cp[2]:8.2f})") + +if __name__ == "__main__": + main() diff --git a/optiphy/ana/compare_gpu_g4.py b/optiphy/ana/compare_gpu_g4.py new file mode 100755 index 000000000..87fe210e6 --- /dev/null +++ b/optiphy/ana/compare_gpu_g4.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python +""" +compare_gpu_g4.py : Compare GPU (opticks) vs G4 (standalone) simulation hits +============================================================================= + +Reads GPU hit/photon arrays from an opticks event folder and G4 hits from +g4_hits.npy, then prints a side-by-side comparison table. + +Usage:: + + python ana/compare_gpu_g4.py + + # Auto-resolves A000 subfolder: + python ana/compare_gpu_g4.py /tmp/$USER/opticks/GEOM/GEOM/GPUPhotonSourceMinimal/ALL0_no_opticks_event_name g4_hits.npy +""" +import sys +import os +import argparse +import numpy as np + +FLAG_ENUM = { + 0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT", + 0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB", + 0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT", + 0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT", + 0x1000: "NAN_ABORT", 0x2000: "EFFICIENCY_COLLECT", 0x8000: "MISS", +} + + +def resolve_event_path(path): + if os.path.exists(os.path.join(path, "photon.npy")): + return path + a000 = os.path.join(path, "A000") + if os.path.exists(os.path.join(a000, "photon.npy")): + return a000 + if os.path.isdir(path): + for d in sorted(os.listdir(path)): + dp = os.path.join(path, d) + if os.path.isdir(dp) and os.path.exists(os.path.join(dp, "photon.npy")): + return dp + return path + + +def hit_stats(hits, label): + """Compute statistics dict from a (N, 4, 4) hit array.""" + n = len(hits) + if n == 0: + return dict(label=label, n=0) + wl = hits[:, 2, 3] + t = hits[:, 0, 3] + pos = hits[:, 0, :3] + r = np.sqrt(np.sum(pos ** 2, axis=1)) + return dict( + label=label, n=n, + wl_min=wl.min(), wl_max=wl.max(), wl_mean=wl.mean(), wl_std=wl.std(), + t_min=t.min(), t_max=t.max(), t_mean=t.mean(), t_std=t.std(), + r_min=r.min(), r_max=r.max(), r_mean=r.mean(), + x_mean=pos[:, 0].mean(), y_mean=pos[:, 1].mean(), z_mean=pos[:, 2].mean(), + ) + + +def print_comparison_table(gpu, g4, n_photons): + """Print side-by-side comparison.""" + w = 14 # column width + + print("=" * 70) + print("GPU vs G4 COMPARISON") + print("=" * 70) + + print(f"\n {'':30s} {'GPU':>{w}s} {'G4':>{w}s} {'Diff':>{w}s}") + print(f" {'-'*30} {'-'*w} {'-'*w} {'-'*w}") + + def row(name, gv, cv, fmt=".1f", diff_fmt=None): + if diff_fmt is None: + diff_fmt = fmt + gs = f"{gv:{fmt}}" if gv is not None else "—" + cs = f"{cv:{fmt}}" if cv is not None else "—" + if gv is not None and cv is not None: + d = cv - gv + ds = f"{d:{diff_fmt}}" + else: + ds = "—" + print(f" {name:30s} {gs:>{w}s} {cs:>{w}s} {ds:>{w}s}") + + row("Hits", gpu["n"], g4["n"], "d") + if n_photons and n_photons > 0: + row("Hit rate (%)", 100.0 * gpu["n"] / n_photons, 100.0 * g4["n"] / n_photons, ".2f") + + if gpu["n"] > 0 and g4["n"] > 0: + ratio = g4["n"] / gpu["n"] + print(f" {'Ratio G4/GPU':30s} {'':>{w}s} {'':>{w}s} {ratio:>{w}.3f}") + + if gpu["n"] == 0 or g4["n"] == 0: + print("\n Cannot compare distributions — one side has zero hits.") + return + + print() + row("Wavelength min (nm)", gpu["wl_min"], g4["wl_min"]) + row("Wavelength max (nm)", gpu["wl_max"], g4["wl_max"]) + row("Wavelength mean (nm)", gpu["wl_mean"], g4["wl_mean"]) + row("Wavelength std (nm)", gpu["wl_std"], g4["wl_std"]) + + print() + row("Time min (ns)", gpu["t_min"], g4["t_min"], ".3f") + row("Time max (ns)", gpu["t_max"], g4["t_max"], ".3f") + row("Time mean (ns)", gpu["t_mean"], g4["t_mean"], ".3f") + row("Time std (ns)", gpu["t_std"], g4["t_std"], ".3f") + + print() + row("Radius min (mm)", gpu["r_min"], g4["r_min"], ".2f") + row("Radius max (mm)", gpu["r_max"], g4["r_max"], ".2f") + row("Radius mean (mm)", gpu["r_mean"], g4["r_mean"], ".2f") + + print() + row("Mean X (mm)", gpu["x_mean"], g4["x_mean"], ".2f") + row("Mean Y (mm)", gpu["y_mean"], g4["y_mean"], ".2f") + row("Mean Z (mm)", gpu["z_mean"], g4["z_mean"], ".2f") + + # Statistical significance + print() + if n_photons and n_photons > 0: + p_pool = (gpu["n"] + g4["n"]) / (2 * n_photons) + std = np.sqrt(p_pool * (1 - p_pool) / n_photons) + if std > 0: + z = abs(gpu["n"] / n_photons - g4["n"] / n_photons) / (std * np.sqrt(2)) + expected_fluct = std * np.sqrt(2) * n_photons + print(f" {'Z-score (hit count)':30s} {z:>{w}.1f}") + print(f" {'Expected 1σ fluctuation':30s} {expected_fluct:>{w}.0f} hits") + if z > 3: + print(f" ** Statistically significant difference (>{3}σ) **") + else: + print(f" Within statistical expectations (<3σ)") + print() + + +def print_gpu_outcomes(photon): + """Print GPU photon outcome summary.""" + q3 = photon[:, 3, :].view(np.uint32) + flag = q3[:, 0] & 0xFFFF + + print("=" * 70) + print("GPU PHOTON OUTCOMES") + print("=" * 70) + + n = len(flag) + vals, counts = np.unique(flag, return_counts=True) + order = np.argsort(-counts) + + print(f"\n {'Flag':<22s} {'Count':>8s} {'%':>7s}") + print(f" {'-'*22} {'-'*8} {'-'*7}") + for idx in order: + f = vals[idx] + c = counts[idx] + name = FLAG_ENUM.get(f, f"0x{f:04x}") + print(f" {name:<22s} {c:8d} {100*c/n:6.1f}%") + print() + + +def print_wavelength_histograms(gpu_hits, g4_hits): + """Print overlaid wavelength histograms.""" + if len(gpu_hits) == 0 or len(g4_hits) == 0: + return + + gpu_wl = gpu_hits[:, 2, 3] + g4_wl = g4_hits[:, 2, 3] + + wl_min = min(gpu_wl.min(), g4_wl.min()) + wl_max = max(gpu_wl.max(), g4_wl.max()) + bins = np.arange(max(100, np.floor(wl_min / 25) * 25), + min(800, np.ceil(wl_max / 25) * 25 + 25), 25) + + gpu_counts, _ = np.histogram(gpu_wl, bins=bins) + g4_counts, _ = np.histogram(g4_wl, bins=bins) + + # Normalize to same total for shape comparison + gpu_norm = gpu_counts / len(gpu_hits) * 1000 + g4_norm = g4_counts / len(g4_hits) * 1000 + + print("=" * 70) + print("WAVELENGTH DISTRIBUTION (per 1000 hits)") + print("=" * 70) + print(f"\n {'Bin (nm)':<14s} {'GPU':>8s} {'G4':>8s} {'GPU':^20s} {'G4':^20s}") + print(f" {'-'*14} {'-'*8} {'-'*8} {'-'*20} {'-'*20}") + + max_bar = 20 + scale = max(gpu_norm.max(), g4_norm.max()) + if scale == 0: + scale = 1 + + for i in range(len(bins) - 1): + if gpu_counts[i] == 0 and g4_counts[i] == 0: + continue + gpu_bar = "#" * int(gpu_norm[i] / scale * max_bar) + g4_bar = "#" * int(g4_norm[i] / scale * max_bar) + print(f" {bins[i]:5.0f}-{bins[i+1]:5.0f} {gpu_norm[i]:8.1f} {g4_norm[i]:8.1f}" + f" {gpu_bar:<20s} {g4_bar:<20s}") + print() + + +def print_time_histograms(gpu_hits, g4_hits): + """Print overlaid time histograms.""" + if len(gpu_hits) == 0 or len(g4_hits) == 0: + return + + gpu_t = gpu_hits[:, 0, 3] + g4_t = g4_hits[:, 0, 3] + + t_max = max(gpu_t.max(), g4_t.max()) + bin_size = max(1.0, np.ceil(t_max / 15)) + bins = np.arange(0, t_max + bin_size, bin_size) + + gpu_counts, _ = np.histogram(gpu_t, bins=bins) + g4_counts, _ = np.histogram(g4_t, bins=bins) + + gpu_norm = gpu_counts / len(gpu_hits) * 1000 + g4_norm = g4_counts / len(g4_hits) * 1000 + + print("=" * 70) + print("TIME DISTRIBUTION (per 1000 hits)") + print("=" * 70) + print(f"\n {'Bin (ns)':<14s} {'GPU':>8s} {'G4':>8s} {'GPU':^20s} {'G4':^20s}") + print(f" {'-'*14} {'-'*8} {'-'*8} {'-'*20} {'-'*20}") + + max_bar = 20 + scale = max(gpu_norm.max(), g4_norm.max()) + if scale == 0: + scale = 1 + + for i in range(len(bins) - 1): + if gpu_counts[i] == 0 and g4_counts[i] == 0: + continue + gpu_bar = "#" * int(gpu_norm[i] / scale * max_bar) + g4_bar = "#" * int(g4_norm[i] / scale * max_bar) + print(f" {bins[i]:5.1f}-{bins[i+1]:5.1f} {gpu_norm[i]:8.1f} {g4_norm[i]:8.1f}" + f" {gpu_bar:<20s} {g4_bar:<20s}") + print() + + +def main(): + parser = argparse.ArgumentParser( + description="Compare GPU (opticks) vs G4 (standalone) simulation hits", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument("gpu_path", help="Path to GPU opticks event folder") + parser.add_argument("g4_hits", help="Path to G4 hits file (g4_hits.npy)") + parser.add_argument("--histograms", action="store_true", + help="Show wavelength and time distribution histograms") + + args = parser.parse_args() + + gpu_path = resolve_event_path(args.gpu_path) + if not os.path.exists(os.path.join(gpu_path, "photon.npy")): + print(f"Error: photon.npy not found in {gpu_path}") + sys.exit(1) + if not os.path.exists(args.g4_hits): + print(f"Error: {args.g4_hits} not found") + sys.exit(1) + + # Load GPU arrays + gpu_hits = np.load(os.path.join(gpu_path, "hit.npy")) if os.path.exists(os.path.join(gpu_path, "hit.npy")) else np.zeros((0, 4, 4), dtype=np.float32) + gpu_photon = np.load(os.path.join(gpu_path, "photon.npy")) + n_photons = len(gpu_photon) + + # Load G4 hits + g4_hits = np.load(args.g4_hits) + + print(f"\nGPU event: {gpu_path}") + print(f"G4 hits: {args.g4_hits}") + print(f"Total photons: {n_photons}\n") + + # Compute stats + gpu_stats = hit_stats(gpu_hits, "GPU") + g4_stats = hit_stats(g4_hits, "G4") + + # Print tables + print_comparison_table(gpu_stats, g4_stats, n_photons) + print_gpu_outcomes(gpu_photon) + + if args.histograms: + print_wavelength_histograms(gpu_hits, g4_hits) + print_time_histograms(gpu_hits, g4_hits) + + +if __name__ == "__main__": + main() diff --git a/optiphy/ana/plot_photon_paths.py b/optiphy/ana/plot_photon_paths.py new file mode 100644 index 000000000..bf6a09245 --- /dev/null +++ b/optiphy/ana/plot_photon_paths.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +"""Plot GPU photon paths colored by wavelength from record.npy. + +Usage: + python optiphy/ana/plot_photon_paths.py [photon_indices] [--output path.png] + +Examples: + # Plot first 10 hit photons + python optiphy/ana/plot_photon_paths.py /tmp/$USER/opticks/GEOM/GEOM/GPUPhotonSourceMinimal/ALL0_no_opticks_event_name/A000 + + # Plot specific photons by index + python optiphy/ana/plot_photon_paths.py /tmp/$USER/opticks/.../A000 2,19,6 + + # Custom output path + python optiphy/ana/plot_photon_paths.py /tmp/$USER/opticks/.../A000 2,19,6 --output my_plot.png +""" +import argparse +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import matplotlib.colors as mcolors +from matplotlib.cm import ScalarMappable + + +def wl_to_rgb(wl): + """Convert wavelength (nm) to RGB tuple. Covers 300-780nm.""" + r = g = b = 0.0 + if 300 <= wl < 380: + t = (wl - 300) / (380 - 300) + r = 0.4 * (1 - t) + 0.5 * t + g = 0 + b = 0.4 * (1 - t) + 1.0 * t + elif 380 <= wl < 440: + r = -(wl - 440) / (440 - 380); g = 0; b = 1 + elif 440 <= wl < 490: + r = 0; g = (wl - 440) / (490 - 440); b = 1 + elif 490 <= wl < 510: + r = 0; g = 1; b = -(wl - 510) / (510 - 490) + elif 510 <= wl < 580: + r = (wl - 510) / (580 - 510); g = 1; b = 0 + elif 580 <= wl < 645: + r = 1; g = -(wl - 645) / (645 - 580); b = 0 + elif 645 <= wl <= 780: + r = 1; g = 0; b = 0 + else: + r = g = b = 0.3 + return (max(0, min(1, r)), max(0, min(1, g)), max(0, min(1, b))) + + +def get_steps(record, pidx): + """Return number of valid steps for photon pidx.""" + rec_p = record[pidx] + rf = rec_p.reshape(rec_p.shape[0], -1) + return int(np.sum(np.any(rf != 0, axis=1))) + + +def plot_photon_paths(event_dir, photon_indices=None, output="photon_paths.png", + sphere_radii=None, title=None, lim=None): + record = np.load(f"{event_dir}/record.npy") + photon = np.load(f"{event_dir}/photon.npy") + + q3 = photon[:, 3, :].copy().view(np.uint32) + flags = q3[:, 0] & 0xFFFF + hit_idx = np.where(flags == 0x40)[0] + + if photon_indices is None: + photon_indices = hit_idx[:10] + + fig = plt.figure(figsize=(12, 10)) + ax = fig.add_subplot(111, projection='3d') + + wl_min, wl_max = 800, 300 + for pidx in photon_indices: + ns = get_steps(record, pidx) + if ns < 2: + continue + rec_p = record[pidx] + x = rec_p[:ns, 0, 0] + y = rec_p[:ns, 0, 1] + z = rec_p[:ns, 0, 2] + wl = rec_p[:ns, 2, 3] + + wl_min = min(wl_min, wl.min()) + wl_max = max(wl_max, wl.max()) + + for s in range(ns - 1): + color = wl_to_rgb(float(wl[s])) + ax.plot([x[s], x[s + 1]], [y[s], y[s + 1]], [z[s], z[s + 1]], + color=color, alpha=0.9, linewidth=2.5) + + ax.scatter(x[0], y[0], z[0], c=[wl_to_rgb(float(wl[0]))], s=60, + marker='o', edgecolors='black', linewidths=0.8, zorder=5) + ax.scatter(x[-1], y[-1], z[-1], c='red', s=100, marker='*', zorder=5) + + # Draw spheres if requested + if sphere_radii: + u = np.linspace(0, 2 * np.pi, 60) + v = np.linspace(0, np.pi, 30) + sphere_colors = ['mediumpurple', 'lightgreen', 'lightyellow', 'lightcoral'] + sphere_alphas = [0.1, 0.05, 0.05, 0.05] + for i, r in enumerate(sphere_radii): + xs = r * np.outer(np.cos(u), np.sin(v)) + ys = r * np.outer(np.sin(u), np.sin(v)) + zs = r * np.outer(np.ones_like(u), np.cos(v)) + ci = min(i, len(sphere_colors) - 1) + ax.plot_surface(xs, ys, zs, alpha=sphere_alphas[ci], color=sphere_colors[ci]) + + # Wavelength colorbar + wl_range = np.linspace(wl_min, wl_max, 256) + colors = [wl_to_rgb(w) for w in wl_range] + cmap = mcolors.ListedColormap(colors) + norm = mcolors.Normalize(vmin=wl_min, vmax=wl_max) + sm = ScalarMappable(cmap=cmap, norm=norm) + sm.set_array([]) + plt.colorbar(sm, ax=ax, shrink=0.5, pad=0.08, label='Wavelength (nm)') + + ax.set_xlabel('X (mm)') + ax.set_ylabel('Y (mm)') + ax.set_zlabel('Z (mm)') + if title: + ax.set_title(title) + if lim: + ax.set_xlim(-lim, lim) + ax.set_ylim(-lim, lim) + ax.set_zlim(-lim, lim) + ax.view_init(elev=20, azim=135) + plt.tight_layout() + plt.savefig(output, dpi=180) + print(f"Saved {output}") + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("event_dir", help="Path to opticks event folder containing record.npy") + parser.add_argument("indices", nargs='?', default=None, + help="Comma-separated photon indices (default: first 10 hits)") + parser.add_argument("--output", "-o", default="photon_paths.png", help="Output image path") + parser.add_argument("--spheres", default=None, + help="Comma-separated sphere radii to draw (e.g. 10,30)") + parser.add_argument("--title", "-t", default=None, help="Plot title") + parser.add_argument("--lim", type=float, default=None, + help="Axis limit in mm (symmetric)") + args = parser.parse_args() + + indices = None + if args.indices: + indices = [int(x) for x in args.indices.split(',')] + + spheres = None + if args.spheres: + spheres = [float(x) for x in args.spheres.split(',')] + + plot_photon_paths(args.event_dir, indices, args.output, + sphere_radii=spheres, title=args.title, lim=args.lim) + + +if __name__ == "__main__": + main() diff --git a/optiphy/ana/run_and_compare.py b/optiphy/ana/run_and_compare.py new file mode 100755 index 000000000..d44980234 --- /dev/null +++ b/optiphy/ana/run_and_compare.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 +"""Run GPU and G4 simulations and compare hit distributions. + +Runs GPURaytrace with a given GDML and config, then plots: + 1. Hit count with sqrt(N) error bars + 2. WLS-shifted wavelength distribution + 3. Full wavelength distribution + 4. Arrival time (bulk, truncated) + 5. Arrival time (full range, no overflow) + 6. 3D hit position scatter for GPU and G4 + +Usage: + python optiphy/ana/run_and_compare.py -g apex.gdml -s 42 [--outdir plots] + + # Skip simulation, use existing .npy files: + python optiphy/ana/run_and_compare.py --gpu-hits gpu_hits.npy --g4-hits g4_hits.npy +""" +import argparse +import os +import subprocess +import sys +import math + +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D + + +def run_simulation(gdml, config, macro, seed): + """Run GPURaytrace and return (gpu_hits_path, g4_hits_path, gpu_nhits, g4_nhits).""" + print("NOTE: For step count plots, set \"mode\": \"DebugLite\" in the config JSON file.") + print(" For hit-only analysis, \"HitPhoton\" is sufficient.") + print() + cmd = ["/opt/eic-opticks/bin/GPURaytrace", + "-g", gdml, "-m", macro, "-s", str(seed)] + if config: + cmd += ["-c", config] + + print(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) + output = result.stdout + result.stderr + + gpu_nhits = g4_nhits = 0 + for line in output.split('\n'): + if 'Opticks: NumHits:' in line: + gpu_nhits = int(line.strip().split()[-1]) + if 'Geant4: NumHits:' in line: + g4_nhits = int(line.strip().split()[-1]) + + print(f" GPU: {gpu_nhits} hits, G4: {g4_nhits} hits") + return "gpu_hits.npy", "g4_hits.npy", gpu_nhits, g4_nhits + + +def load_hits(path, expected_cols=None): + """Load hit array and reshape to (N, ?, 4).""" + a = np.load(path) + if a.ndim == 2: + ncols = a.shape[1] // 4 + a = a.reshape(-1, ncols, 4) + return a + + +def plot_with_errors(ax, data1, data2, bins, label1, label2, xlabel): + """Plot two histograms as points with sqrt(N) error bars.""" + h1, edges = np.histogram(data1, bins=bins) + h2, _ = np.histogram(data2, bins=bins) + centers = (edges[:-1] + edges[1:]) / 2 + width = (edges[1] - edges[0]) * 0.35 + ax.errorbar(centers - width / 2, h1, yerr=np.sqrt(np.maximum(h1, 1)), + fmt='o', color='dodgerblue', markersize=4, capsize=2, + linewidth=1, label=label1) + ax.errorbar(centers + width / 2, h2, yerr=np.sqrt(np.maximum(h2, 1)), + fmt='s', color='orangered', markersize=4, capsize=2, + linewidth=1, label=label2) + ax.set_xlabel(xlabel) + ax.set_ylabel('Counts') + ax.legend() + + +def make_plots(gpu, g4, outdir, title_extra="", g4_full=None, g4_raw_shape=None): + os.makedirs(outdir, exist_ok=True) + + gpu_wl = gpu[:, 2, 3] + g4_wl = g4[:, 2, 3] + gpu_t = gpu[:, 0, 3] + g4_t = g4[:, 0, 3] + gpu_pos = gpu[:, 0, :3] + g4_pos = g4[:, 0, :3] + + diff = 100 * (len(gpu) / len(g4) - 1) if len(g4) > 0 else 0 + z_score = (len(gpu) - len(g4)) / math.sqrt(len(gpu) + len(g4)) if (len(gpu) + len(g4)) > 0 else 0 + header = f"GPU={len(gpu)} G4={len(g4)} ({diff:+.1f}%, {z_score:+.1f}σ)" + if title_extra: + header = f"{title_extra}\n{header}" + + # 1. Hit count + fig, ax = plt.subplots(figsize=(6, 5)) + vals = [len(gpu), len(g4)] + errs = [math.sqrt(v) for v in vals] + ax.errorbar([0], [vals[0]], yerr=[errs[0]], fmt='o', markersize=12, + capsize=8, linewidth=2, color='dodgerblue', label='GPU') + ax.errorbar([1], [vals[1]], yerr=[errs[1]], fmt='s', markersize=12, + capsize=8, linewidth=2, color='orangered', label='G4') + ax.set_xticks([0, 1]) + ax.set_xticklabels(['GPU', 'G4']) + ax.set_ylabel('Hits') + ax.set_title(f'Hit Count\n{header}') + ax.set_xlim(-0.5, 1.5) + ax.legend() + for i, (v, e) in enumerate(zip(vals, errs)): + ax.text(i + 0.15, v, f'{v}±{e:.0f}', va='center', fontsize=10) + plt.tight_layout() + plt.savefig(f'{outdir}/hits.png', dpi=150) + plt.close() + + # 2. WLS-shifted wavelength + fig, ax = plt.subplots(figsize=(8, 5)) + gpu_s = gpu_wl[gpu_wl > 380] + g4_s = g4_wl[g4_wl > 380] + plot_with_errors(ax, gpu_s, g4_s, np.arange(380, 550, 15), + f'GPU ({len(gpu_s)})', f'G4 ({len(g4_s)})', + 'Wavelength (nm)') + ax.set_title(f'WLS-shifted Wavelength (>380nm)\n{header}') + plt.tight_layout() + plt.savefig(f'{outdir}/wavelength_shifted.png', dpi=150) + plt.close() + + # 3. Full wavelength + fig, ax = plt.subplots(figsize=(8, 5)) + plot_with_errors(ax, gpu_wl, g4_wl, np.arange(330, 550, 15), + f'GPU ({len(gpu)})', f'G4 ({len(g4)})', + 'Wavelength (nm)') + ax.set_title(f'Full Wavelength\n{header}') + plt.tight_layout() + plt.savefig(f'{outdir}/wavelength_full.png', dpi=150) + plt.close() + + # 4. Arrival time (bulk, truncated at 99th percentile) + fig, ax = plt.subplots(figsize=(8, 5)) + t_cut = max(np.percentile(gpu_t, 99), np.percentile(g4_t, 99)) + t_bins = np.linspace(0, t_cut, 30) + gpu_over = (gpu_t > t_cut).sum() + g4_over = (g4_t > t_cut).sum() + plot_with_errors(ax, gpu_t[gpu_t <= t_cut], g4_t[g4_t <= t_cut], t_bins, + f'GPU (overflow={gpu_over})', f'G4 (overflow={g4_over})', + 'Time (ns)') + ax.set_title(f'Arrival Time (t < {t_cut:.0f}ns)\n{header}') + ax.set_yscale('log') + ax.set_ylim(bottom=0.5) + plt.tight_layout() + plt.savefig(f'{outdir}/time_bulk.png', dpi=150) + plt.close() + + # 5. Arrival time (full range, no overflow) + fig, ax = plt.subplots(figsize=(8, 5)) + t_max = max(gpu_t.max(), g4_t.max()) * 1.05 + t_bins_full = np.linspace(0, t_max, 50) + plot_with_errors(ax, gpu_t, g4_t, t_bins_full, + f'GPU ({len(gpu)})', f'G4 ({len(g4)})', + 'Time (ns)') + ax.set_title(f'Arrival Time (full range)\n{header}') + ax.set_yscale('log') + ax.set_ylim(bottom=0.5) + plt.tight_layout() + plt.savefig(f'{outdir}/time_full.png', dpi=150) + plt.close() + + # 6. 3D hit positions + fig = plt.figure(figsize=(14, 6)) + + ax1 = fig.add_subplot(121, projection='3d') + ax1.scatter(gpu_pos[:, 0], gpu_pos[:, 1], gpu_pos[:, 2], + c='dodgerblue', s=3, alpha=0.5) + ax1.set_xlabel('X (mm)') + ax1.set_ylabel('Y (mm)') + ax1.set_zlabel('Z (mm)') + ax1.set_title(f'GPU hit positions ({len(gpu)})') + + ax2 = fig.add_subplot(122, projection='3d') + ax2.scatter(g4_pos[:, 0], g4_pos[:, 1], g4_pos[:, 2], + c='orangered', s=3, alpha=0.5) + ax2.set_xlabel('X (mm)') + ax2.set_ylabel('Y (mm)') + ax2.set_zlabel('Z (mm)') + ax2.set_title(f'G4 hit positions ({len(g4)})') + + plt.suptitle(f'3D Hit Positions\n{header}') + plt.tight_layout() + plt.savefig(f'{outdir}/positions_3d.png', dpi=150) + plt.close() + + # 7. Step count distribution (GPU from record.npy, G4 from extended hit array) + gpu_steps = None + g4_steps = None + + # GPU: try to load record.npy and count steps per hit + record_path = os.path.join(os.path.dirname(os.environ.get('OPTICKS_EVTDIR', '')), + 'record.npy') + # Try common paths + for rpath in [record_path, + '/tmp/MISSING_USER/opticks/GEOM/GEOM/GPURaytrace/ALL0_no_opticks_event_name/A000/record.npy', + '/tmp/MISSING_USER/opticks/GEOM/GEOM/GPUPhotonSourceMinimal/ALL0_no_opticks_event_name/A000/record.npy']: + if os.path.exists(rpath): + try: + record = np.load(rpath) + photon_all = np.load(rpath.replace('record.npy', 'photon.npy')) + q3_all = photon_all[:, 3, :].copy().view(np.uint32) + flags_all = q3_all[:, 0] & 0xFFFF + hit_mask = (flags_all == 0x40) + hit_indices = np.where(hit_mask)[0] + rec_flat = record[hit_indices].reshape(len(hit_indices), record.shape[1], -1) + gpu_steps = np.sum(np.any(rec_flat != 0, axis=2), axis=1) + print(f" GPU step counts loaded from {rpath}") + except Exception: + pass + break + + # G4: step count stored in row 3, col 3 (last float of the 4x4 array) + g4_q3w = g4[:, 3, 3] + if np.any(g4_q3w > 0): + g4_steps = g4_q3w.astype(int) + print(f" G4 step counts loaded from hit array row 3 col 3") + + if gpu_steps is not None or g4_steps is not None: + fig, ax = plt.subplots(figsize=(8, 5)) + s_max = 0 + if gpu_steps is not None: + s_max = max(s_max, np.percentile(gpu_steps, 99)) + if g4_steps is not None: + s_max = max(s_max, np.percentile(g4_steps, 99)) + s_bins = np.linspace(0, s_max * 1.1, 30) + + if gpu_steps is not None and g4_steps is not None: + plot_with_errors(ax, gpu_steps, g4_steps, s_bins, + f'GPU ({len(gpu_steps)})', f'G4 ({len(g4_steps)})', + 'Steps to detection') + elif gpu_steps is not None: + h, edges = np.histogram(gpu_steps, bins=s_bins) + centers = (edges[:-1] + edges[1:]) / 2 + ax.errorbar(centers, h, yerr=np.sqrt(np.maximum(h, 1)), + fmt='o', color='dodgerblue', markersize=4, capsize=2, + label=f'GPU ({len(gpu_steps)})') + ax.set_xlabel('Steps to detection') + ax.set_ylabel('Counts') + ax.legend() + elif g4_steps is not None: + h, edges = np.histogram(g4_steps, bins=s_bins) + centers = (edges[:-1] + edges[1:]) / 2 + ax.errorbar(centers, h, yerr=np.sqrt(np.maximum(h, 1)), + fmt='s', color='orangered', markersize=4, capsize=2, + label=f'G4 ({len(g4_steps)})') + ax.set_xlabel('Steps to detection') + ax.set_ylabel('Counts') + ax.legend() + + ax.set_title(f'Steps to Detection\n{header}') + plt.tight_layout() + plt.savefig(f'{outdir}/step_count.png', dpi=150) + plt.close() + + # Print summary + print(f"\nSummary: {header}") + print(f" Wavelength: GPU mean={gpu_wl.mean():.1f}nm G4 mean={g4_wl.mean():.1f}nm") + print(f" Time: GPU mean={gpu_t.mean():.2f}ns G4 mean={g4_t.mean():.2f}ns") + print(f" WLS shifted: GPU {100*(gpu_wl>380).mean():.1f}% G4 {100*(g4_wl>380).mean():.1f}%") + if gpu_steps is not None: + print(f" GPU steps: mean={gpu_steps.mean():.0f} median={np.median(gpu_steps):.0f} max={gpu_steps.max()}") + if g4_steps is not None: + print(f" G4 steps: mean={g4_steps.mean():.0f} median={np.median(g4_steps):.0f} max={g4_steps.max()}") + print(f"\nPlots saved to {outdir}/:") + for f in ['hits.png', 'wavelength_shifted.png', 'wavelength_full.png', + 'time_bulk.png', 'time_full.png', 'positions_3d.png', 'step_count.png']: + print(f" {f}") + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("-g", "--gdml", default="apex.gdml", help="GDML geometry file") + parser.add_argument("-c", "--config", default=None, help="Config name (e.g. det_debug)") + parser.add_argument("-m", "--macro", default="tests/run_genstep.mac", help="G4 macro file") + parser.add_argument("-s", "--seed", type=int, default=42, help="Random seed") + parser.add_argument("--outdir", default="plots", help="Output directory for plots") + parser.add_argument("--title", default="", help="Extra title text") + parser.add_argument("--gpu-hits", default=None, help="Skip sim, use existing GPU hits .npy") + parser.add_argument("--g4-hits", default=None, help="Skip sim, use existing G4 hits .npy") + args = parser.parse_args() + + if args.gpu_hits and args.g4_hits: + print(f"Using existing files: {args.gpu_hits}, {args.g4_hits}") + else: + run_simulation(args.gdml, args.config, args.macro, args.seed) + args.gpu_hits = "gpu_hits.npy" + args.g4_hits = "g4_hits.npy" + + gpu = load_hits(args.gpu_hits) + g4_raw = load_hits(args.g4_hits) + g4_raw_shape = g4_raw.shape + + # Keep full G4 array for step count extraction + g4_full = g4_raw.copy() if g4_raw.shape[1] >= 5 else None + + # Normalize to (N, 4, 4) — take first 4 rows if more + gpu = gpu[:, :4, :] if gpu.shape[1] > 4 else gpu + g4 = g4_raw[:, :4, :] if g4_raw.shape[1] > 4 else g4_raw + + make_plots(gpu, g4, args.outdir, title_extra=args.title, + g4_full=g4_full, g4_raw_shape=g4_raw_shape) + + +if __name__ == "__main__": + main() diff --git a/optiphy/ana/run_genstep_comparison.py b/optiphy/ana/run_genstep_comparison.py new file mode 100644 index 000000000..321ae6085 --- /dev/null +++ b/optiphy/ana/run_genstep_comparison.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +""" +run_genstep_comparison.py +========================== + +Runs GPU (simg4ox) and G4 (G4ValidationGenstep) simulations with the same +electron primary, then compares the optical photon hit distributions. + +Usage: + python run_genstep_comparison.py [--gdml apex.gdml] [--energy 1.0] [--nevents 10] [--seed 42] +""" +import os +import sys +import subprocess +import argparse +import numpy as np +from pathlib import Path + +def find_gpu_hits(): + """Find the most recent GPU hit.npy output.""" + base = Path(f"/tmp/{os.environ.get('USER','MISSING_USER')}/opticks") + candidates = sorted(base.rglob("hit.npy"), key=lambda p: p.stat().st_mtime, reverse=True) + return str(candidates[0]) if candidates else None + +def run_g4(gdml, energy, nevents, seed, pos, direction): + """Run pure G4 simulation with electron primary.""" + cmd = [ + "G4ValidationGenstep", + "-g", gdml, + "-e", str(energy), + "-n", str(nevents), + "-s", str(seed), + "--pos", pos, + "--dir", direction, + ] + print(f"=== Running G4: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) + + # Extract hit count from output + g4_hits = 0 + for line in result.stdout.split('\n'): + if "Total hits:" in line: + g4_hits = int(line.split("Total hits:")[-1].strip()) + + print(f"G4: {g4_hits} hits") + if result.returncode != 0: + print(f"G4 STDERR (last 5 lines):") + for line in result.stderr.strip().split('\n')[-5:]: + print(f" {line}") + return g4_hits + +def run_gpu(gdml, config, macro, seed): + """Run GPU simulation via simg4ox.""" + env = os.environ.copy() + env["OPTICKS_INTEGRATION_MODE"] = "1" # Minimal mode: G4 tracks electron, GPU propagates optical + + cmd = [ + "simg4ox", + "-g", gdml, + "-c", config, + "-m", macro, + ] + print(f"\n=== Running GPU: {' '.join(cmd)}") + print(f" OPTICKS_INTEGRATION_MODE=1 (Minimal)") + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env) + + if result.returncode != 0: + print(f"GPU STDERR (last 10 lines):") + for line in result.stderr.strip().split('\n')[-10:]: + print(f" {line}") + return 0 + + # Find hit output + hit_path = find_gpu_hits() + if hit_path and os.path.exists(hit_path): + hits = np.load(hit_path) + print(f"GPU: {len(hits)} hits (from {hit_path})") + return len(hits) + else: + print("GPU: no hit.npy found") + return 0 + +def compare_hits(g4_path, gpu_path): + """Compare G4 and GPU hit arrays.""" + if not os.path.exists(g4_path): + print(f"G4 hits not found: {g4_path}") + return + if not gpu_path or not os.path.exists(gpu_path): + print(f"GPU hits not found") + return + + g4 = np.load(g4_path) + gpu = np.load(gpu_path) + + print(f"\n{'='*60}") + print(f"HIT COMPARISON") + print(f"{'='*60}") + print(f" G4 hits: {len(g4)}") + print(f" GPU hits: {len(gpu)}") + + if len(g4) > 0 and len(gpu) > 0: + diff = len(gpu) - len(g4) + pct = 100 * diff / len(g4) if len(g4) > 0 else 0 + sign = "+" if diff > 0 else "" + print(f" Diff: {sign}{diff} ({sign}{pct:.1f}%)") + + # Position distributions + if len(g4) > 0: + g4_pos = g4[:, 0, :3] + print(f"\n G4 hit positions:") + print(f" x: [{g4_pos[:,0].min():.1f}, {g4_pos[:,0].max():.1f}] mm") + print(f" y: [{g4_pos[:,1].min():.1f}, {g4_pos[:,1].max():.1f}] mm") + print(f" z: [{g4_pos[:,2].min():.1f}, {g4_pos[:,2].max():.1f}] mm") + + if len(gpu) > 0: + gpu_pos = gpu[:, 0, :3] + print(f"\n GPU hit positions:") + print(f" x: [{gpu_pos[:,0].min():.1f}, {gpu_pos[:,0].max():.1f}] mm") + print(f" y: [{gpu_pos[:,1].min():.1f}, {gpu_pos[:,1].max():.1f}] mm") + print(f" z: [{gpu_pos[:,2].min():.1f}, {gpu_pos[:,2].max():.1f}] mm") + + # Wavelength distributions + if len(g4) > 0: + g4_wl = g4[:, 2, 3] + print(f"\n G4 wavelength: mean={g4_wl.mean():.1f} std={g4_wl.std():.1f} nm") + if len(gpu) > 0: + gpu_wl = gpu[:, 2, 3] + print(f" GPU wavelength: mean={gpu_wl.mean():.1f} std={gpu_wl.std():.1f} nm") + + # Time distributions + if len(g4) > 0: + g4_t = g4[:, 0, 3] + print(f"\n G4 time: mean={g4_t.mean():.2f} max={g4_t.max():.2f} ns") + if len(gpu) > 0: + gpu_t = gpu[:, 0, 3] + print(f" GPU time: mean={gpu_t.mean():.2f} max={gpu_t.max():.2f} ns") + + +def main(): + parser = argparse.ArgumentParser(description="Compare GPU vs G4 electron genstep simulation") + parser.add_argument("--gdml", default="apex.gdml", help="GDML geometry file") + parser.add_argument("--energy", type=float, default=1.0, help="Electron energy in MeV") + parser.add_argument("--nevents", type=int, default=10, help="Number of events") + parser.add_argument("--seed", type=int, default=42, help="Random seed") + parser.add_argument("--pos", default="0,0,100", help="Electron position x,y,z mm") + parser.add_argument("--dir", default="0,0,1", help="Electron direction x,y,z") + args = parser.parse_args() + + # Run G4 + g4_hits = run_g4(args.gdml, args.energy, args.nevents, args.seed, args.pos, args.dir) + + # Compare + g4_path = "g4_genstep_hits.npy" + gpu_path = find_gpu_hits() + + if os.path.exists(g4_path): + g4 = np.load(g4_path) + print(f"\n{'='*60}") + print(f"G4 RESULTS ({args.nevents} events, {args.energy} MeV electron)") + print(f"{'='*60}") + print(f" Total hits: {len(g4)}") + print(f" Hits/event: {len(g4)/args.nevents:.1f}") + if len(g4) > 0: + g4_wl = g4[:, 2, 3] + g4_pos = g4[:, 0, :3] + print(f" Wavelength: mean={g4_wl.mean():.1f} nm") + print(f" Hit y range: [{g4_pos[:,1].min():.1f}, {g4_pos[:,1].max():.1f}] mm") + + +if __name__ == "__main__": + main() diff --git a/optiphy/ana/wls_diagnostic.py b/optiphy/ana/wls_diagnostic.py new file mode 100644 index 000000000..6975983cb --- /dev/null +++ b/optiphy/ana/wls_diagnostic.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 +""" +wls_diagnostic.py : Detailed WLS wavelength distribution comparison GPU vs G4 +============================================================================== + +Compares wavelength and time distributions from GPU (opticks) and G4 hits, +performs KS test, and checks per-pass WLS conversion probability. + +Usage:: + + python ana/wls_diagnostic.py [--input-wavelength 350] +""" +import sys +import os +import argparse +import numpy as np + + +def resolve_event_path(path): + if os.path.exists(os.path.join(path, "photon.npy")): + return path + a000 = os.path.join(path, "A000") + if os.path.exists(os.path.join(a000, "photon.npy")): + return a000 + if os.path.isdir(path): + for d in sorted(os.listdir(path)): + dp = os.path.join(path, d) + if os.path.isdir(dp) and os.path.exists(os.path.join(dp, "photon.npy")): + return dp + return path + + +FLAG_ENUM = { + 0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT", + 0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB", + 0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT", + 0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT", + 0x1000: "NAN_ABORT", 0x2000: "EFFICIENCY_COLLECT", 0x8000: "MISS", +} + + +def ks_test_2sample(a, b): + """Two-sample Kolmogorov-Smirnov test (no scipy dependency).""" + na, nb = len(a), len(b) + a_sorted = np.sort(a) + b_sorted = np.sort(b) + all_vals = np.sort(np.concatenate([a_sorted, b_sorted])) + + cdf_a = np.searchsorted(a_sorted, all_vals, side='right') / na + cdf_b = np.searchsorted(b_sorted, all_vals, side='right') / nb + d_stat = np.max(np.abs(cdf_a - cdf_b)) + + # Approximate p-value (asymptotic) + n_eff = np.sqrt(na * nb / (na + nb)) + lam = (n_eff + 0.12 + 0.11 / n_eff) * d_stat + # Kolmogorov distribution approximation + if lam < 0.001: + p_val = 1.0 + else: + p_val = 2.0 * np.exp(-2.0 * lam * lam) + p_val = max(0.0, min(1.0, p_val)) + return d_stat, p_val + + +def print_header(title): + print() + print("=" * 74) + print(f" {title}") + print("=" * 74) + + +def print_hit_summary(gpu_hits, g4_hits, n_photons, input_wl): + print_header("HIT COUNT SUMMARY") + ng, nc = len(gpu_hits), len(g4_hits) + print(f" Input photons: {n_photons:>10d} (wavelength = {input_wl:.0f} nm)") + print(f" GPU hits: {ng:>10d} ({100*ng/n_photons:.2f}%)") + print(f" G4 hits: {nc:>10d} ({100*nc/n_photons:.2f}%)") + if ng > 0 and nc > 0: + ratio = nc / ng + # Significance + p_pool = (ng + nc) / (2 * n_photons) + se = np.sqrt(2 * p_pool * (1 - p_pool) / n_photons) + z = abs(ng/n_photons - nc/n_photons) / se if se > 0 else 0 + print(f" Ratio G4/GPU: {ratio:>10.4f}") + print(f" Z-score: {z:>10.1f} {'** SIGNIFICANT **' if z > 3 else '(within noise)'}") + print() + + +def print_wavelength_comparison(gpu_wl, g4_wl): + print_header("WAVELENGTH DISTRIBUTION COMPARISON") + + print(f"\n {'Statistic':<25s} {'GPU':>12s} {'G4':>12s} {'Diff':>12s}") + print(f" {'-'*25} {'-'*12} {'-'*12} {'-'*12}") + + for name, fn in [("Mean (nm)", np.mean), ("Std (nm)", np.std), + ("Median (nm)", np.median), ("Min (nm)", np.min), + ("Max (nm)", np.max)]: + gv, cv = fn(gpu_wl), fn(g4_wl) + print(f" {name:<25s} {gv:12.2f} {cv:12.2f} {cv-gv:12.2f}") + + # Percentiles + print() + for pct in [5, 25, 75, 95]: + gv = np.percentile(gpu_wl, pct) + cv = np.percentile(g4_wl, pct) + print(f" {'P%d (nm)' % pct:<25s} {gv:12.2f} {cv:12.2f} {cv-gv:12.2f}") + + # KS test + d_stat, p_val = ks_test_2sample(gpu_wl, g4_wl) + print(f"\n KS statistic: {d_stat:.6f}") + print(f" KS p-value: {p_val:.2e}") + if p_val < 0.01: + print(" ** Wavelength distributions are SIGNIFICANTLY DIFFERENT **") + else: + print(" Wavelength distributions are statistically compatible") + print() + + +def print_fine_histogram(gpu_wl, g4_wl, bin_width=10): + print_header(f"WAVELENGTH HISTOGRAM (bin={bin_width}nm)") + + lo = min(gpu_wl.min(), g4_wl.min()) + hi = max(gpu_wl.max(), g4_wl.max()) + bins = np.arange(np.floor(lo / bin_width) * bin_width, + np.ceil(hi / bin_width) * bin_width + bin_width, bin_width) + + gc, _ = np.histogram(gpu_wl, bins=bins) + cc, _ = np.histogram(g4_wl, bins=bins) + + # Normalize to density (per nm per photon) + gpu_dens = gc / (len(gpu_wl) * bin_width) + g4_dens = cc / (len(g4_wl) * bin_width) + + max_dens = max(gpu_dens.max(), g4_dens.max()) + bar_w = 25 + + print(f"\n {'Bin (nm)':<14s} {'GPU':>8s} {'G4':>8s} {'GPU/G4':>7s} GPU|G4") + print(f" {'-'*14} {'-'*8} {'-'*8} {'-'*7} {'-'*51}") + + for i in range(len(bins) - 1): + if gc[i] == 0 and cc[i] == 0: + continue + ratio_str = f"{gc[i]/cc[i]:.2f}" if cc[i] > 0 else " inf" + gb = "#" * int(gpu_dens[i] / max_dens * bar_w) if max_dens > 0 else "" + cb = "=" * int(g4_dens[i] / max_dens * bar_w) if max_dens > 0 else "" + print(f" {bins[i]:5.0f}-{bins[i+1]:5.0f} {gc[i]:8d} {cc[i]:8d} {ratio_str:>7s} {gb:<25s}|{cb:<25s}") + print() + + +def print_time_comparison(gpu_t, g4_t): + print_header("TIME DISTRIBUTION COMPARISON") + + print(f"\n {'Statistic':<25s} {'GPU':>12s} {'G4':>12s} {'Diff':>12s}") + print(f" {'-'*25} {'-'*12} {'-'*12} {'-'*12}") + + for name, fn in [("Mean (ns)", np.mean), ("Std (ns)", np.std), + ("Median (ns)", np.median), ("Min (ns)", np.min), + ("Max (ns)", np.max)]: + gv, cv = fn(gpu_t), fn(g4_t) + print(f" {name:<25s} {gv:12.4f} {cv:12.4f} {cv-gv:12.4f}") + + d_stat, p_val = ks_test_2sample(gpu_t, g4_t) + print(f"\n KS statistic: {d_stat:.6f}") + print(f" KS p-value: {p_val:.2e}") + if p_val < 0.01: + print(" ** Time distributions are SIGNIFICANTLY DIFFERENT **") + else: + print(" Time distributions are statistically compatible") + print() + + +def print_gpu_outcomes(photon): + print_header("GPU PHOTON OUTCOMES (all photons)") + q3 = photon[:, 3, :].view(np.uint32) + flag = q3[:, 0] & 0xFFFF + n = len(flag) + vals, counts = np.unique(flag, return_counts=True) + order = np.argsort(-counts) + + print(f"\n {'Flag':<22s} {'Count':>8s} {'%':>7s}") + print(f" {'-'*22} {'-'*8} {'-'*7}") + for idx in order: + f = vals[idx] + c = counts[idx] + name = FLAG_ENUM.get(f, f"0x{f:04x}") + print(f" {name:<22s} {c:8d} {100*c/n:6.2f}%") + print() + + +def print_position_comparison(gpu_hits, g4_hits): + print_header("SPATIAL DISTRIBUTION") + + gpu_pos = gpu_hits[:, 0, :3] + g4_pos = g4_hits[:, 0, :3] + gpu_r = np.sqrt(np.sum(gpu_pos**2, axis=1)) + g4_r = np.sqrt(np.sum(g4_pos**2, axis=1)) + + print(f"\n {'Statistic':<25s} {'GPU':>12s} {'G4':>12s} {'Diff':>12s}") + print(f" {'-'*25} {'-'*12} {'-'*12} {'-'*12}") + + for name, gv, cv in [ + ("Mean radius (mm)", gpu_r.mean(), g4_r.mean()), + ("Mean X (mm)", gpu_pos[:, 0].mean(), g4_pos[:, 0].mean()), + ("Mean Y (mm)", gpu_pos[:, 1].mean(), g4_pos[:, 1].mean()), + ("Mean Z (mm)", gpu_pos[:, 2].mean(), g4_pos[:, 2].mean()), + ]: + print(f" {name:<25s} {gv:12.3f} {cv:12.3f} {cv-gv:12.3f}") + print() + + +def print_energy_conservation(gpu_wl, g4_wl, input_wl): + print_header("ENERGY CONSERVATION CHECK") + gpu_viol = np.sum(gpu_wl < input_wl) + g4_viol = np.sum(g4_wl < input_wl) + print(f" Input wavelength: {input_wl:.0f} nm") + print(f" GPU hits with wl < input: {gpu_viol} / {len(gpu_wl)}") + print(f" G4 hits with wl < input: {g4_viol} / {len(g4_wl)}") + if gpu_viol == 0 and g4_viol == 0: + print(" ALL PASS: energy conservation satisfied") + else: + if gpu_viol > 0: + bad = gpu_wl[gpu_wl < input_wl] + print(f" GPU violations: min={bad.min():.1f}nm, mean={bad.mean():.1f}nm") + if g4_viol > 0: + bad = g4_wl[g4_wl < input_wl] + print(f" G4 violations: min={bad.min():.1f}nm, mean={bad.mean():.1f}nm") + print() + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("gpu_path", help="GPU opticks event folder") + parser.add_argument("g4_hits", help="G4 hits file (g4_hits.npy)") + parser.add_argument("--input-wavelength", type=float, default=350.0, + help="Input photon wavelength in nm (default: 350)") + parser.add_argument("--bin-width", type=float, default=5.0, + help="Histogram bin width in nm (default: 5)") + args = parser.parse_args() + + gpu_path = resolve_event_path(args.gpu_path) + hit_path = os.path.join(gpu_path, "hit.npy") + photon_path = os.path.join(gpu_path, "photon.npy") + + if not os.path.exists(photon_path): + print(f"Error: photon.npy not found in {gpu_path}") + sys.exit(1) + if not os.path.exists(args.g4_hits): + print(f"Error: {args.g4_hits} not found") + sys.exit(1) + + gpu_photon = np.load(photon_path) + gpu_hits = np.load(hit_path) if os.path.exists(hit_path) else np.zeros((0, 4, 4), dtype=np.float32) + g4_hits = np.load(args.g4_hits) + n_photons = len(gpu_photon) + + print(f"\n GPU event path: {gpu_path}") + print(f" G4 hits file: {args.g4_hits}") + + # Hit summary + print_hit_summary(gpu_hits, g4_hits, n_photons, args.input_wavelength) + + if len(gpu_hits) == 0 or len(g4_hits) == 0: + print(" Cannot compare — one side has zero hits.") + return + + gpu_wl = gpu_hits[:, 2, 3] + g4_wl = g4_hits[:, 2, 3] + gpu_t = gpu_hits[:, 0, 3] + g4_t = g4_hits[:, 0, 3] + + # GPU outcomes + print_gpu_outcomes(gpu_photon) + + # Energy conservation + print_energy_conservation(gpu_wl, g4_wl, args.input_wavelength) + + # Wavelength comparison + print_wavelength_comparison(gpu_wl, g4_wl) + print_fine_histogram(gpu_wl, g4_wl, bin_width=args.bin_width) + + # Time comparison + print_time_comparison(gpu_t, g4_t) + + # Spatial + print_position_comparison(gpu_hits, g4_hits) + + +if __name__ == "__main__": + main() diff --git a/qudarap/CMakeLists.txt b/qudarap/CMakeLists.txt index 1529e1999..8a7228499 100644 --- a/qudarap/CMakeLists.txt +++ b/qudarap/CMakeLists.txt @@ -51,6 +51,8 @@ set(SOURCES QScint.cc QScint.cu + QWls.cc + QCerenkovIntegral.cc QCerenkov.cc QCerenkov.cu @@ -120,6 +122,9 @@ SET(HEADERS QScint.hh qscint.h + QWls.hh + qwls.h + QCerenkovIntegral.hh QCerenkov.hh qcerenkov.h diff --git a/qudarap/QSim.cc b/qudarap/QSim.cc index bf8f77156..d3ed3879a 100644 --- a/qudarap/QSim.cc +++ b/qudarap/QSim.cc @@ -3,73 +3,71 @@ #include "SLOG.hh" -#include "ssys.h" -#include "sstamp.h" -#include "spath.h" #include "SProf.hh" +#include "spath.h" +#include "sstamp.h" +#include "ssys.h" #include "SComp.h" +#include "SEvent.hh" +#include "SEventConfig.hh" #include "SEvt.hh" #include "SSim.hh" +#include "salloc.h" #include "scuda.h" #include "squad.h" -#include "salloc.h" -#include "SEvent.hh" -#include "SEventConfig.hh" -//#include "SCSGOptiX.h" +// #include "SCSGOptiX.h" #include "SSimulator.h" #include "SGenstep.h" #include "sslice.h" #include "NP.hh" -#include "QUDA_CHECK.h" #include "QU.hh" +#include "QUDA_CHECK.h" +#include "qdebug.h" #include "qrng.h" #include "qsim.h" -#include "qdebug.h" #include "QBase.hh" -#include "QEvt.hh" -#include "QRng.hh" -#include "QTex.hh" -#include "QScint.hh" -#include "QCerenkov.hh" #include "QBnd.hh" -#include "QProp.hh" -#include "QMultiFilm.hh" +#include "QCerenkov.hh" +#include "QDebug.hh" #include "QEvt.hh" +#include "QMultiFilm.hh" #include "QOptical.hh" -#include "QSimLaunch.hh" -#include "QDebug.hh" #include "QPMT.hh" +#include "QProp.hh" +#include "QRng.hh" +#include "QScint.hh" +#include "QSimLaunch.hh" +#include "QTex.hh" +#include "QWls.hh" #include "QSim.hh" const plog::Severity QSim::LEVEL = SLOG::EnvLevel("QSim", "DEBUG"); -const bool QSim::REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT); -const int QSim::SAVE_IGS_EVENTID = ssys::getenvint(_QSim__SAVE_IGS_EVENTID,-1) ; -const char* QSim::SAVE_IGS_PATH = ssys::getenvvar(_QSim__SAVE_IGS_PATH, "$TMP/.opticks/igs.npy"); -const bool QSim::CONCAT = ssys::getenvbool(_QSim__CONCAT); -const bool QSim::ALLOC = ssys::getenvbool(_QSim__ALLOC); +const bool QSim::REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT); +const int QSim::SAVE_IGS_EVENTID = ssys::getenvint(_QSim__SAVE_IGS_EVENTID, -1); +const char *QSim::SAVE_IGS_PATH = ssys::getenvvar(_QSim__SAVE_IGS_PATH, "$TMP/.opticks/igs.npy"); +const bool QSim::CONCAT = ssys::getenvbool(_QSim__CONCAT); +const bool QSim::ALLOC = ssys::getenvbool(_QSim__ALLOC); - - -QSim* QSim::INSTANCE = nullptr ; -QSim* QSim::Get(){ return INSTANCE ; } - -QSim* QSim::Create() +QSim *QSim::INSTANCE = nullptr; +QSim *QSim::Get() { - LOG_IF(fatal, INSTANCE != nullptr) << " a QSim INSTANCE already exists " ; - assert( INSTANCE == nullptr ) ; - return new QSim ; + return INSTANCE; } - - +QSim *QSim::Create() +{ + LOG_IF(fatal, INSTANCE != nullptr) << " a QSim INSTANCE already exists "; + assert(INSTANCE == nullptr); + return new QSim; +} /** QSim::UploadComponents @@ -115,137 +113,135 @@ This structure is used to allow separate testing. **/ -void QSim::UploadComponents( const SSim* ssim ) +void QSim::UploadComponents(const SSim *ssim) { - LOG(LEVEL) << "[ ssim " << ssim ; - if(getenv("QSim__UploadComponents_SIGINT")) std::raise(SIGINT); - - LOG(LEVEL) << "[ new QBase" ; - QBase* base = new QBase ; - LOG(LEVEL) << "] new QBase : latency here of about 0.3s from first device access, if latency of >1s need to start nvidia-persistenced " ; + LOG(LEVEL) << "[ ssim " << ssim; + if (getenv("QSim__UploadComponents_SIGINT")) + std::raise(SIGINT); + + LOG(LEVEL) << "[ new QBase"; + QBase *base = new QBase; + LOG(LEVEL) << "] new QBase : latency here of about 0.3s from first device access, if latency of >1s need to start " + "nvidia-persistenced "; LOG(LEVEL) << base->desc(); - - unsigned skipahead_event_offset = SEventConfig::EventSkipahead() ; - LOG(LEVEL) << "[ new QRng skipahead_event_offset : " << skipahead_event_offset << " " << SEventConfig::kEventSkipahead ; - QRng* rng = new QRng(skipahead_event_offset) ; // loads and uploads RNG - LOG(LEVEL) << "] new QRng " << rng->desc() ; + unsigned skipahead_event_offset = SEventConfig::EventSkipahead(); + LOG(LEVEL) << "[ new QRng skipahead_event_offset : " << skipahead_event_offset << " " + << SEventConfig::kEventSkipahead; + QRng *rng = new QRng(skipahead_event_offset); // loads and uploads RNG + LOG(LEVEL) << "] new QRng " << rng->desc(); LOG(LEVEL) << rng->desc(); - const NP* optical = ssim->get(snam::OPTICAL); - const NP* bnd = ssim->get(snam::BND); + const NP *optical = ssim->get(snam::OPTICAL); + const NP *bnd = ssim->get(snam::BND); - if( optical == nullptr && bnd == nullptr ) + if (optical == nullptr && bnd == nullptr) { - LOG(error) << " optical and bnd null snam::OPTICAL " << snam::OPTICAL << " snam::BND " << snam::BND ; + LOG(error) << " optical and bnd null snam::OPTICAL " << snam::OPTICAL << " snam::BND " << snam::BND; } else { - // note that QOptical and QBnd are tightly coupled, perhaps add constraints to tie them together - QOptical* qopt = new QOptical(optical); + // note that QOptical and QBnd are tightly coupled, perhaps add constraints to tie them together + QOptical *qopt = new QOptical(optical); LOG(LEVEL) << qopt->desc(); - QBnd* qbnd = new QBnd(bnd); // boundary texture with standard domain, used for standard fast property lookup + QBnd *qbnd = new QBnd(bnd); // boundary texture with standard domain, used for standard fast property lookup LOG(LEVEL) << qbnd->desc(); } - QDebug* debug_ = new QDebug ; - LOG(LEVEL) << debug_->desc() ; + QDebug *debug_ = new QDebug; + LOG(LEVEL) << debug_->desc(); - const NP* propcom = ssim->get(snam::PROPCOM); - if( propcom ) + const NP *propcom = ssim->get(snam::PROPCOM); + if (propcom) { - LOG(LEVEL) << "[ QProp " ; - QProp* prop = new QProp(propcom) ; + LOG(LEVEL) << "[ QProp "; + QProp *prop = new QProp(propcom); // property interpolation with per-property domains, eg used for Cerenkov RINDEX sampling - LOG(LEVEL) << "] QProp " ; + LOG(LEVEL) << "] QProp "; LOG(LEVEL) << prop->desc(); } else { - LOG(LEVEL) << " propcom null, snam::PROPCOM " << snam::PROPCOM ; + LOG(LEVEL) << " propcom null, snam::PROPCOM " << snam::PROPCOM; } - - const NP* icdf = ssim->get(snam::ICDF); - if( icdf == nullptr ) + const NP *icdf = ssim->get(snam::ICDF); + if (icdf == nullptr) { - LOG(error) << " icdf null, snam::ICDF " << snam::ICDF ; + LOG(error) << " icdf null, snam::ICDF " << snam::ICDF; } else { - unsigned hd_factor = 20u ; // 0,10,20 - QScint* scint = new QScint( icdf, hd_factor); // custom high-definition inverse CDF for scintillation generation + unsigned hd_factor = 20u; // 0,10,20 + QScint *scint = new QScint(icdf, hd_factor); // custom high-definition inverse CDF for scintillation generation LOG(LEVEL) << scint->desc(); } + const NP *wls_icdf = ssim->get(snam::WLS_ICDF); + const NP *wls_mat_map = ssim->get(snam::WLS_MAT_MAP); + if (wls_icdf == nullptr || wls_mat_map == nullptr) + { + LOG(LEVEL) << " wls_icdf or wls_mat_map null — no WLS materials in geometry "; + } + else + { + const NP *wls_tc = ssim->get(snam::WLS_TIME_CONSTANTS); + if (wls_tc) + { + unsigned hd_factor = 20u; + QWls *qwls_ = new QWls(wls_icdf, wls_mat_map, wls_tc, hd_factor); + LOG(LEVEL) << qwls_->desc(); + } + else + { + LOG(error) << " wls_icdf and wls_mat_map present but wls_time_constants missing "; + } + } // TODO: make this more like the others : acting on the available inputs rather than the mode - bool is_simtrace = SEventConfig::IsRGModeSimtrace() ; - if(is_simtrace == false ) + bool is_simtrace = SEventConfig::IsRGModeSimtrace(); + if (is_simtrace == false) { - QCerenkov* cerenkov = new QCerenkov ; + QCerenkov *cerenkov = new QCerenkov; LOG(LEVEL) << cerenkov->desc(); } else { - LOG(LEVEL) << " skip QCerenkov for simtrace running " ; + LOG(LEVEL) << " skip QCerenkov for simtrace running "; } + const NPFold *spmt_f = ssim->get_spmt_f(); + QPMT *qpmt = spmt_f ? new QPMT(spmt_f) : nullptr; + bool has_PMT = spmt_f != nullptr && qpmt != nullptr; + bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false; + LOG_IF(fatal, MISSING_PMT) << " MISSING_PMT " << " has_PMT " << (has_PMT ? "YES" : "NO ") << " REQUIRE_PMT " + << (REQUIRE_PMT ? "YES" : "NO ") << " MISSING_PMT " << (MISSING_PMT ? "YES" : "NO ") + << " spmt_f " << (spmt_f ? "YES" : "NO ") << " qpmt " << (qpmt ? "YES" : "NO "); - const NPFold* spmt_f = ssim->get_spmt_f() ; - QPMT* qpmt = spmt_f ? new QPMT(spmt_f) : nullptr ; + assert(MISSING_PMT == false); + if (MISSING_PMT) + std::raise(SIGINT); - bool has_PMT = spmt_f != nullptr && qpmt != nullptr ; - bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false ; + LOG(LEVEL) << QPMT::Desc() << std::endl + << " spmt_f " << (spmt_f ? "YES" : "NO ") << " qpmt " << (qpmt ? "YES" : "NO "); - LOG_IF(fatal, MISSING_PMT ) - << " MISSING_PMT " - << " has_PMT " << ( has_PMT ? "YES" : "NO " ) - << " REQUIRE_PMT " << ( REQUIRE_PMT ? "YES" : "NO " ) - << " MISSING_PMT " << ( MISSING_PMT ? "YES" : "NO " ) - << " spmt_f " << ( spmt_f ? "YES" : "NO " ) - << " qpmt " << ( qpmt ? "YES" : "NO " ) - ; - - assert(MISSING_PMT == false) ; - if(MISSING_PMT) std::raise(SIGINT); - - - - LOG(LEVEL) - << QPMT::Desc() - << std::endl - << " spmt_f " << ( spmt_f ? "YES" : "NO " ) - << " qpmt " << ( qpmt ? "YES" : "NO " ) - ; - - - - const NP* multifilm = ssim->get_extra(snam::MULTIFILM); - if(multifilm == nullptr) + const NP *multifilm = ssim->get_extra(snam::MULTIFILM); + if (multifilm == nullptr) { - LOG(LEVEL) << " multifilm null, snam::MULTIFILM " << snam::MULTIFILM ; + LOG(LEVEL) << " multifilm null, snam::MULTIFILM " << snam::MULTIFILM; } else { - QMultiFilm* mul = new QMultiFilm( multifilm ); + QMultiFilm *mul = new QMultiFilm(multifilm); LOG(LEVEL) << mul->desc(); } - LOG(LEVEL) << "] ssim " << ssim ; - - - + LOG(LEVEL) << "] ssim " << ssim; } - - - - - /** QSim:::QSim ------------- @@ -261,29 +257,15 @@ singleton components. **/ QSim::QSim() - : - base(QBase::Get()), - qev(new QEvt), - sev(qev->sev), - rng(QRng::Get()), - scint(QScint::Get()), - cerenkov(QCerenkov::Get()), - bnd(QBnd::Get()), - debug_(QDebug::Get()), - prop(QProp::Get()), - pmt(QPMT::Get()), - multifilm(QMultiFilm::Get()), - sim(nullptr), - d_sim(nullptr), - dbg(debug_ ? debug_->dbg : nullptr), - d_dbg(debug_ ? debug_->d_dbg : nullptr), - cx(nullptr) + : base(QBase::Get()), qev(new QEvt), sev(qev->sev), rng(QRng::Get()), scint(QScint::Get()), qwls(QWls::Get()), + cerenkov(QCerenkov::Get()), bnd(QBnd::Get()), debug_(QDebug::Get()), prop(QProp::Get()), + pmt(QPMT::Get()), multifilm(QMultiFilm::Get()), sim(nullptr), d_sim(nullptr), + dbg(debug_ ? debug_->dbg : nullptr), d_dbg(debug_ ? debug_->d_dbg : nullptr), cx(nullptr) { - LOG(LEVEL) << desc() ; + LOG(LEVEL) << desc(); init(); } - /** QSim::init ------------ @@ -303,51 +285,43 @@ place (qsim.h) to add GPU side functionality. **/ - void QSim::init() { - sim = new qsim ; - sim->base = base ? base->d_base : nullptr ; - sim->evt = qev ? qev->getDevicePtr() : nullptr ; - //sim->rng_state = rng ? rng->qr->uploaded_states : nullptr ; - sim->rng = rng ? rng->d_qr : nullptr ; + sim = new qsim; + sim->base = base ? base->d_base : nullptr; + sim->evt = qev ? qev->getDevicePtr() : nullptr; + // sim->rng_state = rng ? rng->qr->uploaded_states : nullptr ; + sim->rng = rng ? rng->d_qr : nullptr; + + sim->bnd = bnd ? bnd->d_qb : nullptr; + sim->multifilm = multifilm ? multifilm->d_multifilm : nullptr; + sim->cerenkov = cerenkov ? cerenkov->d_cerenkov : nullptr; + sim->scint = scint ? scint->d_scint : nullptr; + sim->wls = qwls ? qwls->d_wls : nullptr; + sim->pmt = pmt ? pmt->d_pmt : nullptr; + + bool has_PMT = pmt != nullptr && sim->pmt != nullptr; + bool REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT); + bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false; - sim->bnd = bnd ? bnd->d_qb : nullptr ; - sim->multifilm = multifilm ? multifilm->d_multifilm : nullptr ; - sim->cerenkov = cerenkov ? cerenkov->d_cerenkov : nullptr ; - sim->scint = scint ? scint->d_scint : nullptr ; - sim->pmt = pmt ? pmt->d_pmt : nullptr ; + LOG(LEVEL) << " MISSING_PMT " << (MISSING_PMT ? "YES" : "NO ") << " has_PMT " << (has_PMT ? "YES" : "NO ") + << " QSim::pmt " << (pmt ? "YES" : "NO ") << " QSim::pmt->d_pmt " << (sim->pmt ? "YES" : "NO ") << " [" + << _QSim__REQUIRE_PMT << "] " << (REQUIRE_PMT ? "YES" : "NO "); + LOG_IF(fatal, MISSING_PMT) << " MISSING_PMT ABORT " << " MISSING_PMT " << (MISSING_PMT ? "YES" : "NO ") + << " has_PMT " << (has_PMT ? "YES" : "NO ") << " QSim::pmt " << (pmt ? "YES" : "NO ") + << " QSim::pmt->d_pmt " << (sim->pmt ? "YES" : "NO ") << " [" << _QSim__REQUIRE_PMT + << "] " << (REQUIRE_PMT ? "YES" : "NO "); - bool has_PMT = pmt != nullptr && sim->pmt != nullptr ; - bool REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT); - bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false ; - - LOG(LEVEL) - << " MISSING_PMT " << ( MISSING_PMT ? "YES" : "NO " ) - << " has_PMT " << ( has_PMT ? "YES" : "NO " ) - << " QSim::pmt " << ( pmt ? "YES" : "NO " ) - << " QSim::pmt->d_pmt " << ( sim->pmt ? "YES" : "NO " ) - << " [" << _QSim__REQUIRE_PMT << "] " << ( REQUIRE_PMT ? "YES" : "NO " ) - ; - - LOG_IF(fatal, MISSING_PMT ) - << " MISSING_PMT ABORT " - << " MISSING_PMT " << ( MISSING_PMT ? "YES" : "NO " ) - << " has_PMT " << ( has_PMT ? "YES" : "NO " ) - << " QSim::pmt " << ( pmt ? "YES" : "NO " ) - << " QSim::pmt->d_pmt " << ( sim->pmt ? "YES" : "NO " ) - << " [" << _QSim__REQUIRE_PMT << "] " << ( REQUIRE_PMT ? "YES" : "NO " ) - ; - - assert(MISSING_PMT == false) ; - if(MISSING_PMT) std::raise(SIGINT); - - d_sim = QU::UploadArray(sim, 1, "QSim::init.sim" ); - - INSTANCE = this ; - LOG(LEVEL) << desc() ; - LOG(LEVEL) << descComponents() ; + assert(MISSING_PMT == false); + if (MISSING_PMT) + std::raise(SIGINT); + + d_sim = QU::UploadArray(sim, 1, "QSim::init.sim"); + + INSTANCE = this; + LOG(LEVEL) << desc(); + LOG(LEVEL) << descComponents(); } /** @@ -357,12 +331,11 @@ QSim::setLauncher Formerly used SCSGOptiX **/ -void QSim::setLauncher(SSimulator* cx_ ) +void QSim::setLauncher(SSimulator *cx_) { - cx = cx_ ; + cx = cx_; } - /** QSim::post_launch -------------------- @@ -378,7 +351,6 @@ void QSim::post_launch() } **/ - /** QSim::simulate --------------- @@ -427,190 +399,159 @@ bool QSim::KEEP_SUBFOLD = ssys::getenvbool(QSim__simulate_KEEP_SUBFOLD); double QSim::simulate(int eventID, bool reset_) { - SProf::SetTag(eventID, "A%0.3d_" ) ; + SProf::SetTag(eventID, "A%0.3d_"); - assert( SEventConfig::IsRGModeSimulate() ); + assert(SEventConfig::IsRGModeSimulate()); - //cudaStream_t stream ; cudaStreamCreate(&stream); - cudaStream_t stream = 0 ; + // cudaStream_t stream ; cudaStreamCreate(&stream); + cudaStream_t stream = 0; + int64_t tot_ph = 0; - int64_t tot_ph = 0 ; + double tot_dt = 0.; - double tot_dt = 0. ; - - int64_t tot_idt = 0 ; - int64_t tot_gdt = 0 ; + int64_t tot_idt = 0; + int64_t tot_gdt = 0; int64_t t_HEAD = SProf::Add("QSim__simulate_HEAD"); - LOG_IF(info, SEvt::LIFECYCLE) << "[ eventID " << eventID ; - if( qev == nullptr ) return -1. ; - + LOG_IF(info, SEvt::LIFECYCLE) << "[ eventID " << eventID; + if (qev == nullptr) + return -1.; - sev->beginOfEvent(eventID); // set SEvt index and tees up frame gensteps for simtrace and input photon simulate running + sev->beginOfEvent( + eventID); // set SEvt index and tees up frame gensteps for simtrace and input photon simulate running - NP* igs = sev->makeGenstepArrayFromVector(); + NP *igs = sev->makeGenstepArrayFromVector(); MaybeSaveIGS(eventID, igs); - std::vector igs_slice ; - int64_t tot_ph_0 = SGenstep::GetGenstepSlices( igs_slice, igs, SEventConfig::MaxSlot() ); + std::vector igs_slice; + int64_t tot_ph_0 = SGenstep::GetGenstepSlices(igs_slice, igs, SEventConfig::MaxSlot()); - //bool xxl = tot_ph_0 > SGenstep::MAX_SLOT_PER_SLICE ; - bool xxl = tot_ph_0 > 100*M ; + // bool xxl = tot_ph_0 > SGenstep::MAX_SLOT_PER_SLICE ; + bool xxl = tot_ph_0 > 100 * M; int num_slice = igs_slice.size(); - LOG(xxl ? info : LEVEL) - << " eventID " << std::setw(6) << eventID - << " igs " << ( igs ? igs->sstr() : "-" ) - << " tot_ph_0 " << tot_ph_0 - << " tot_ph_0/M " << tot_ph_0/M - << " xxl " << ( xxl ? "YES" : "NO " ) - << " MaxSlot " << SEventConfig::MaxSlot() - << " MaxSlot/M " << SEventConfig::MaxSlot()/M - << " sslice::Desc(igs_slice)\n" - << sslice::Desc(igs_slice) - << " num_slice " << num_slice - ; - + LOG(xxl ? info : LEVEL) << " eventID " << std::setw(6) << eventID << " igs " << (igs ? igs->sstr() : "-") + << " tot_ph_0 " << tot_ph_0 << " tot_ph_0/M " << tot_ph_0 / M << " xxl " + << (xxl ? "YES" : "NO ") << " MaxSlot " << SEventConfig::MaxSlot() << " MaxSlot/M " + << SEventConfig::MaxSlot() / M << " sslice::Desc(igs_slice)\n" + << sslice::Desc(igs_slice) << " num_slice " << num_slice; int64_t t_LBEG = SProf::Add("QSim__simulate_LBEG"); - for(int i=0 ; i < num_slice ; i++) + for (int i = 0; i < num_slice; i++) { SProf::Add("QSim__simulate_PRUP"); - const sslice& sl = igs_slice[i] ; + const sslice &sl = igs_slice[i]; - LOG(LEVEL) << sl.idx_desc(i) ; + LOG(LEVEL) << sl.idx_desc(i); - int rc = qev->setGenstepUpload_NP(igs, &sl ) ; - LOG_IF(error, rc != 0) << " QEvt::setGenstep ERROR : have qev but no gensteps collected : will skip cx.simulate " ; - - LOG_IF(info, ALLOC) - << " [" << _QSim__ALLOC << "] " - << " i " << std::setw(5) << i - << " SEventConfig::ALLOC " << ( SEventConfig::ALLOC ? "YES" : "NO " ) - << ( SEventConfig::ALLOC ? SEventConfig::ALLOC->desc() : "-" ) - ; + int rc = qev->setGenstepUpload_NP(igs, &sl); + LOG_IF(error, rc != 0) + << " QEvt::setGenstep ERROR : have qev but no gensteps collected : will skip cx.simulate "; + LOG_IF(info, ALLOC) << " [" << _QSim__ALLOC << "] " << " i " << std::setw(5) << i << " SEventConfig::ALLOC " + << (SEventConfig::ALLOC ? "YES" : "NO ") + << (SEventConfig::ALLOC ? SEventConfig::ALLOC->desc() : "-"); SProf::Add("QSim__simulate_PREL"); - sev->t_PreLaunch = sstamp::Now() ; + sev->t_PreLaunch = sstamp::Now(); - double dt = rc == 0 && cx != nullptr ? cx->simulate_launch() : -1. ; //SSimulator protocol + double dt = rc == 0 && cx != nullptr ? cx->simulate_launch() : -1.; // SSimulator protocol - sev->t_PostLaunch = sstamp::Now() ; - sev->t_Launch = dt ; + sev->t_PostLaunch = sstamp::Now(); + sev->t_Launch = dt; - tot_idt += ( sev->t_PostLaunch - sev->t_PreLaunch ) ; - tot_dt += dt ; - tot_ph += sl.ph_count ; + tot_idt += (sev->t_PostLaunch - sev->t_PreLaunch); + tot_dt += dt; + tot_ph += sl.ph_count; - LOG( xxl ? info : LEVEL ) - << " eventID " << eventID - << " xxl " << ( xxl ? "YES" : "NO " ) - << " i " << std::setw(4) << i - << " dt " << std::setw(11) << std::fixed << std::setprecision(6) << dt - << " slice " << sl.idx_desc(i) - ; + LOG(xxl ? info : LEVEL) << " eventID " << eventID << " xxl " << (xxl ? "YES" : "NO ") << " i " << std::setw(4) + << i << " dt " << std::setw(11) << std::fixed << std::setprecision(6) << dt << " slice " + << sl.idx_desc(i); int64_t t_POST = SProf::Add("QSim__simulate_POST"); - sev->gather(); // gather into *fold* just added to *topfold* + sev->gather(); // gather into *fold* just added to *topfold* int64_t t_DOWN = SProf::Add("QSim__simulate_DOWN"); - tot_gdt += ( t_DOWN - t_POST ) ; + tot_gdt += (t_DOWN - t_POST); } - - size_t max_slot_M = SEventConfig::MaxSlot()/M; - std::string anno = SProf::Annotation("slice",num_slice, "max_slot_M", max_slot_M); + size_t max_slot_M = SEventConfig::MaxSlot() / M; + std::string anno = SProf::Annotation("slice", num_slice, "max_slot_M", max_slot_M); int64_t t_LEND = SProf::Add("QSim__simulate_LEND", anno.c_str()); - std::stringstream ss ; - std::ostream* out = CONCAT ? &ss : nullptr ; + std::stringstream ss; + std::ostream *out = CONCAT ? &ss : nullptr; int concat_rc = sev->topfold->concat(out); - LOG_IF(info, CONCAT) << ss.str() ; - LOG_IF(fatal, concat_rc != 0) << " sev->topfold->concat FAILED " ; + LOG_IF(info, CONCAT) << ss.str(); + LOG_IF(fatal, concat_rc != 0) << " sev->topfold->concat FAILED "; assert(concat_rc == 0); bool has_hlm = sev->topfold->has_key(SComp::HITLITEMERGED_); - bool has_hm = sev->topfold->has_key(SComp::HITMERGED_); - bool do_final_merge = num_slice > 1 && ( has_hlm || has_hm ) ; - LOG(LEVEL) - << " num_slice " << num_slice - << " has_hm " << ( has_hm ? "YES" : "NO " ) - << " has_hlm " << ( has_hlm ? "YES" : "NO " ) - << " do_final_merge " << ( do_final_merge ? "YES" : "NO " ) - ; - if(do_final_merge) simulate_final_merge(tot_ph, stream); + bool has_hm = sev->topfold->has_key(SComp::HITMERGED_); + bool do_final_merge = num_slice > 1 && (has_hlm || has_hm); + LOG(LEVEL) << " num_slice " << num_slice << " has_hm " << (has_hm ? "YES" : "NO ") << " has_hlm " + << (has_hlm ? "YES" : "NO ") << " do_final_merge " << (do_final_merge ? "YES" : "NO "); + if (do_final_merge) + simulate_final_merge(tot_ph, stream); - - if(!KEEP_SUBFOLD) sev->topfold->clear_subfold(); + if (!KEEP_SUBFOLD) + sev->topfold->clear_subfold(); int64_t t_PCAT = SProf::Add("QSim__simulate_PCAT"); - int tot_ht = sev->getNumHit() ; // NB from fold, so requires hits array gathering to be configured to get non-zero - std::string counts = sev->getCounts(); // collect counts before reset - - LOG_IF(info, SEvt::MINIMAL) - << " eventID " << eventID - << " tot_dt " << std::setw(11) << std::fixed << std::setprecision(6) << tot_dt - << " tot_ph " << std::setw(10) << tot_ph - << " tot_ph/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ph)/float(M) - << " tot_ht " << std::setw(10) << tot_ht - << " tot_ht/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ht)/float(M) - << " tot_ht/tot_ph " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ht)/float(tot_ph) - << " reset_ " << ( reset_ ? "YES" : "NO " ) - ; + int tot_ht = sev->getNumHit(); // NB from fold, so requires hits array gathering to be configured to get non-zero + std::string counts = sev->getCounts(); // collect counts before reset + LOG_IF(info, SEvt::MINIMAL) << " eventID " << eventID << " tot_dt " << std::setw(11) << std::fixed + << std::setprecision(6) << tot_dt << " tot_ph " << std::setw(10) << tot_ph + << " tot_ph/M " << std::setw(10) << std::fixed << std::setprecision(6) + << float(tot_ph) / float(M) << " tot_ht " << std::setw(10) << tot_ht << " tot_ht/M " + << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ht) / float(M) + << " tot_ht/tot_ph " << std::setw(10) << std::fixed << std::setprecision(6) + << float(tot_ht) / float(tot_ph) << " reset_ " << (reset_ ? "YES" : "NO "); - assert( tot_ph == tot_ph_0 ); + assert(tot_ph == tot_ph_0); - int64_t t_BRES = SProf::Add("QSim__simulate_BRES", counts.c_str() ); - if(reset_) reset(eventID) ; + int64_t t_BRES = SProf::Add("QSim__simulate_BRES", counts.c_str()); + if (reset_) + reset(eventID); - int64_t t_TAIL = SProf::Add("QSim__simulate_TAIL"); + int64_t t_TAIL = SProf::Add("QSim__simulate_TAIL"); SProf::Write(); // per-event write, so have something in case of crash - LOG_IF(info, SEvt::MINTIME) << "\n" - << SEvt::SEvt__MINTIME - << "\n" - << " (TAIL - HEAD)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_TAIL - t_HEAD )/M - << " (head to tail of QSim::simulate method) " - << "\n" - << " (LEND - LBEG)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_LEND - t_LBEG )/M - << " (multilaunch loop begin to end) " - << "\n" - << " (PCAT - LEND)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_PCAT - t_LEND )/M - << " (topfold concat and clear subfold) " + LOG_IF(info, SEvt::MINTIME) << "\n" - << " (TAIL - BRES)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_TAIL - t_BRES )/M - << " (QSim::reset which saves hits) " - << "\n" - << " tot_idt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_idt)/M - << " (sum of kernel execution int64_t stamp differences in microseconds)" - << "\n" - << " tot_dt " << std::setw(10) << std::fixed << std::setprecision(6) << tot_dt - << " int(tot_dt*M) " << std::setw(10) << int64_t(tot_dt*M) - << " (sum of kernel execution double chrono stamp differences in seconds, and scaled to ms) " - << "\n" - << " tot_gdt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_gdt)/M - << " (sum of SEvt::gather int64_t stamp differences in microseconds)" - << "\n" - ; - - return tot_dt ; + << SEvt::SEvt__MINTIME << "\n" + << " (TAIL - HEAD)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_TAIL - t_HEAD) / M + << " (head to tail of QSim::simulate method) " << "\n" + << " (LEND - LBEG)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_LEND - t_LBEG) / M + << " (multilaunch loop begin to end) " << "\n" + << " (PCAT - LEND)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_PCAT - t_LEND) / M + << " (topfold concat and clear subfold) " << "\n" + << " (TAIL - BRES)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_TAIL - t_BRES) / M + << " (QSim::reset which saves hits) " << "\n" + << " tot_idt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_idt) / M + << " (sum of kernel execution int64_t stamp differences in microseconds)" << "\n" + << " tot_dt " << std::setw(10) << std::fixed << std::setprecision(6) << tot_dt << " int(tot_dt*M) " + << std::setw(10) << int64_t(tot_dt * M) + << " (sum of kernel execution double chrono stamp differences in seconds, and scaled to ms) " << "\n" + << " tot_gdt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_gdt) / M + << " (sum of SEvt::gather int64_t stamp differences in microseconds)" << "\n"; + + return tot_dt; } - /** QSim::simulate_final_merge --------------------------- @@ -633,58 +574,49 @@ TODO: use QEvt::FinalMerge_async once that makes sense void QSim::simulate_final_merge(int64_t tot_ph, cudaStream_t stream) { bool has_hlm = sev->topfold->has_key(SComp::HITLITEMERGED_); - bool has_hm = sev->topfold->has_key(SComp::HITMERGED_); + bool has_hm = sev->topfold->has_key(SComp::HITMERGED_); - if( has_hlm ) + if (has_hlm) { - const NP* hlm = sev->topfold->get(SComp::HITLITEMERGED_); - NP* fin = QEvt::FinalMerge(hlm, stream); + const NP *hlm = sev->topfold->get(SComp::HITLITEMERGED_); + NP *fin = QEvt::FinalMerge(hlm, stream); - float hlm_frac = float(hlm->num_items())/float(tot_ph) ; - float fin_frac = float(fin->num_items())/float(hlm->num_items()) ; + float hlm_frac = float(hlm->num_items()) / float(tot_ph); + float fin_frac = float(fin->num_items()) / float(hlm->num_items()); - std::stringstream ss ; - ss - << " tot_ph " << tot_ph - << " hlm " << ( hlm ? hlm->sstr() : "-" ) - << " fin " << ( fin ? fin->sstr() : "-" ) - << " hlm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hlm_frac - << " fin/hlm " << std::setw(7) << std::fixed << std::setprecision(4) << fin_frac - ; + std::stringstream ss; + ss << " tot_ph " << tot_ph << " hlm " << (hlm ? hlm->sstr() : "-") << " fin " << (fin ? fin->sstr() : "-") + << " hlm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hlm_frac << " fin/hlm " + << std::setw(7) << std::fixed << std::setprecision(4) << fin_frac; std::string note = ss.str(); - fin->set_meta("QSim__simulate_final_merge", note ); + fin->set_meta("QSim__simulate_final_merge", note); - sev->topfold->set(SComp::HITLITEMERGED_, fin ); + sev->topfold->set(SComp::HITLITEMERGED_, fin); - LOG(info) << note ; + LOG(info) << note; } - if( has_hm ) + if (has_hm) { - const NP* hm = sev->topfold->get(SComp::HITMERGED_); - NP* fi = QEvt::FinalMerge(hm, stream); + const NP *hm = sev->topfold->get(SComp::HITMERGED_); + NP *fi = QEvt::FinalMerge(hm, stream); - float hm_frac = float(hm->num_items())/float(tot_ph) ; - float fi_frac = float(fi->num_items())/float(hm->num_items()) ; + float hm_frac = float(hm->num_items()) / float(tot_ph); + float fi_frac = float(fi->num_items()) / float(hm->num_items()); - std::stringstream ss ; - ss - << " tot_ph " << tot_ph - << " hm " << ( hm ? hm->sstr() : "-" ) - << " fi " << ( fi ? fi->sstr() : "-" ) - << " hm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hm_frac - << " fi/hm " << std::setw(7) << std::fixed << std::setprecision(4) << fi_frac - ; + std::stringstream ss; + ss << " tot_ph " << tot_ph << " hm " << (hm ? hm->sstr() : "-") << " fi " << (fi ? fi->sstr() : "-") + << " hm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hm_frac << " fi/hm " << std::setw(7) + << std::fixed << std::setprecision(4) << fi_frac; std::string note = ss.str(); - fi->set_meta("QSim__simulate_final_merge", note ); + fi->set_meta("QSim__simulate_final_merge", note); - sev->topfold->set(SComp::HITMERGED_, fi ); - LOG(info) << note ; + sev->topfold->set(SComp::HITMERGED_, fi); + LOG(info) << note; } } - /** QSim::simulate ---------------- @@ -707,36 +639,30 @@ Thus is used from language crossing stack:: **/ - -NP* QSim::simulate(const NP* gs, int eventID ) +NP *QSim::simulate(const NP *gs, int eventID) { bool eventID_expected = eventID > -1; - if(!eventID_expected) std::cerr << "QSim::simulate gs lacks needed eventID metadata [" << eventID << "]\n" ; + if (!eventID_expected) + std::cerr << "QSim::simulate gs lacks needed eventID metadata [" << eventID << "]\n"; assert(eventID_expected); - assert( sev == SEvt::Get_EGPU() ); + assert(sev == SEvt::Get_EGPU()); sev->addGenstep(gs); - bool reset_ = false ; + bool reset_ = false; double tot_dt = simulate(eventID, reset_); - const NP* _ht = sev->getHit(); - NP* ht = _ht ? _ht->copy() : nullptr ; // copy global hits from SEvt before reset + const NP *_ht = sev->getHit(); + NP *ht = _ht ? _ht->copy() : nullptr; // copy global hits from SEvt before reset ht->set_meta("QSim__simulate_tot_dt", tot_dt); - LOG(info) - << " eventID " << std::setw(6) << eventID - << " gs " << ( gs ? gs->sstr() : "-" ) - << " ht " << ( ht ? ht->sstr() : "-" ) - << " tot_dt " << std::fixed << std::setw(10) << std::setprecision(6) << tot_dt - ; + LOG(info) << " eventID " << std::setw(6) << eventID << " gs " << (gs ? gs->sstr() : "-") << " ht " + << (ht ? ht->sstr() : "-") << " tot_dt " << std::fixed << std::setw(10) << std::setprecision(6) << tot_dt; reset(eventID); - return ht ; + return ht; } - - /** QSim::MaybeSaveIGS -------------------- @@ -765,27 +691,21 @@ Try manually reducing slots to see if memory limits are the cause:: **/ -void QSim::MaybeSaveIGS(int eventID, NP* igs) // static +void QSim::MaybeSaveIGS(int eventID, NP *igs) // static { - bool igs_null = igs == nullptr ; - const char* igs_path = SAVE_IGS_PATH ? spath::Resolve(SAVE_IGS_PATH) : nullptr ; - bool save_igs = igs && SAVE_IGS_EVENTID == eventID && igs_path ; - LOG(LEVEL) - << " eventID " << eventID - << " igs " << ( igs ? igs->sstr() : "-" ) - << " igs_null " << ( igs_null ? "YES" : "NO " ) - << " [" << _QSim__SAVE_IGS_EVENTID << "] " << SAVE_IGS_EVENTID - << " [" << _QSim__SAVE_IGS_PATH << "] " << ( SAVE_IGS_PATH ? SAVE_IGS_PATH : "-" ) - << " igs_path [" << ( igs_path ? igs_path : "-" ) << "]" - << " save_igs " << ( save_igs ? "YES" : "NO " ) - ; - - if(!save_igs) return ; + bool igs_null = igs == nullptr; + const char *igs_path = SAVE_IGS_PATH ? spath::Resolve(SAVE_IGS_PATH) : nullptr; + bool save_igs = igs && SAVE_IGS_EVENTID == eventID && igs_path; + LOG(LEVEL) << " eventID " << eventID << " igs " << (igs ? igs->sstr() : "-") << " igs_null " + << (igs_null ? "YES" : "NO ") << " [" << _QSim__SAVE_IGS_EVENTID << "] " << SAVE_IGS_EVENTID << " [" + << _QSim__SAVE_IGS_PATH << "] " << (SAVE_IGS_PATH ? SAVE_IGS_PATH : "-") << " igs_path [" + << (igs_path ? igs_path : "-") << "]" << " save_igs " << (save_igs ? "YES" : "NO "); + + if (!save_igs) + return; igs->save(igs_path); } - - /** QSim::getPhotonSlotOffset --------------------------- @@ -810,14 +730,11 @@ or equal the number of states uploaded. **/ - - unsigned long long QSim::get_photon_slot_offset() const { - return qev->get_photon_slot_offset() ; + return qev->get_photon_slot_offset(); } - /** QSim::reset ------------ @@ -838,12 +755,10 @@ void QSim::reset(int eventID) SProf::Add("QSim__reset_HEAD"); qev->clear(); sev->endOfEvent(eventID); - LOG_IF(info, SEvt::LIFECYCLE) << "] eventID " << eventID ; + LOG_IF(info, SEvt::LIFECYCLE) << "] eventID " << eventID; SProf::Add("QSim__reset_TAIL"); } - - /** QSim::simtrace --------------- @@ -853,30 +768,26 @@ Collected genstep are uploaded and the CSGOptiX kernel is launched to generate a **/ - double QSim::simtrace(int eventID) { - assert( SEventConfig::IsRGModeSimtrace() ); - + assert(SEventConfig::IsRGModeSimtrace()); sev->beginOfEvent(eventID); - NP* igs = sev->makeGenstepArrayFromVector(); + NP *igs = sev->makeGenstepArrayFromVector(); - LOG_IF(fatal, igs==nullptr) - << " igs NULL " - << " sev.descGenstepArrayFromVector " << sev->descGenstepArrayFromVector() - ; + LOG_IF(fatal, igs == nullptr) << " igs NULL " << " sev.descGenstepArrayFromVector " + << sev->descGenstepArrayFromVector(); assert(igs); - int rc = qev->setGenstepUpload_NP(igs) ; + int rc = qev->setGenstepUpload_NP(igs); - LOG_IF(error, rc != 0) << " QEvt::setGenstep ERROR : no gensteps collected : will skip cx.simtrace " ; + LOG_IF(error, rc != 0) << " QEvt::setGenstep ERROR : no gensteps collected : will skip cx.simtrace "; - sev->t_PreLaunch = sstamp::Now() ; - double dt = rc == 0 && cx != nullptr ? cx->simtrace_launch() : -1. ; - sev->t_PostLaunch = sstamp::Now() ; - sev->t_Launch = dt ; + sev->t_PreLaunch = sstamp::Now(); + double dt = rc == 0 && cx != nullptr ? cx->simtrace_launch() : -1.; + sev->t_PostLaunch = sstamp::Now(); + sev->t_Launch = dt; // see ~/o/notes/issues/cxt_min_simtrace_revival.rst sev->gather(); @@ -886,94 +797,80 @@ double QSim::simtrace(int eventID) sev->endOfEvent(eventID); - return dt ; + return dt; } - -qsim* QSim::getDevicePtr() const +qsim *QSim::getDevicePtr() const { - return d_sim ; + return d_sim; } - char QSim::getScintTexFilterMode() const { - return scint->tex->getFilterMode() ; + return scint->tex->getFilterMode(); } std::string QSim::desc() const { - std::stringstream ss ; - ss << "QSim::desc" - << std::endl - << " this 0x" << std::hex << std::uint64_t(this) << std::dec - << " INSTANCE 0x" << std::hex << std::uint64_t(INSTANCE) << std::dec - << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec - << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec - ; + std::stringstream ss; + ss << "QSim::desc" << std::endl + << " this 0x" << std::hex << std::uint64_t(this) << std::dec << " INSTANCE 0x" << std::hex + << std::uint64_t(INSTANCE) << std::dec << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec + << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec; std::string s = ss.str(); - return s ; + return s; } std::string QSim::descFull() const { - std::stringstream ss ; - ss - << std::endl - << "QSim::descFull" - << std::endl - << " this 0x" << std::hex << std::uint64_t(this) << std::dec - << " INSTANCE 0x" << std::hex << std::uint64_t(INSTANCE) << std::dec - << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec - << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec - << " qsim.h:d_sim 0x" << std::hex << std::uint64_t(d_sim) << std::dec - //<< " sim->rng_state 0x" << std::hex << std::uint64_t(sim->rng_state) << std::dec // tending to SEGV on some systems - << " sim->base 0x" << std::hex << std::uint64_t(sim->base) << std::dec - << " sim->bnd 0x" << std::hex << std::uint64_t(sim->bnd) << std::dec - << " sim->scint 0x" << std::hex << std::uint64_t(sim->scint) << std::dec - << " sim->cerenkov 0x" << std::hex << std::uint64_t(sim->cerenkov) << std::dec - ; + std::stringstream ss; + ss << std::endl + << "QSim::descFull" << std::endl + << " this 0x" << std::hex << std::uint64_t(this) << std::dec << " INSTANCE 0x" << std::hex + << std::uint64_t(INSTANCE) << std::dec << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec + << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec << " qsim.h:d_sim 0x" << std::hex + << std::uint64_t(d_sim) + << std::dec + //<< " sim->rng_state 0x" << std::hex << std::uint64_t(sim->rng_state) << std::dec // tending to SEGV on some + // systems + << " sim->base 0x" << std::hex << std::uint64_t(sim->base) << std::dec << " sim->bnd 0x" << std::hex + << std::uint64_t(sim->bnd) << std::dec << " sim->scint 0x" << std::hex << std::uint64_t(sim->scint) << std::dec + << " sim->cerenkov 0x" << std::hex << std::uint64_t(sim->cerenkov) << std::dec; std::string s = ss.str(); - return s ; + return s; } std::string QSim::descComponents() const { - std::stringstream ss ; + std::stringstream ss; ss << std::endl - << "QSim::descComponents" - << std::endl - << " (QBase)base " << ( base ? "YES" : "NO " ) << std::endl - << " (QEvt)qev " << ( qev ? "YES" : "NO " ) << std::endl - << " (SEvt)sev " << ( sev ? "YES" : "NO " ) << std::endl - << " (QRng)rng " << ( rng ? "YES" : "NO " ) << std::endl - << " (QScint)scint " << ( scint ? "YES" : "NO " ) << std::endl - << " (QCerenkov)cerenkov " << ( cerenkov ? "YES" : "NO " ) << std::endl - << " (QBnd)bnd " << ( bnd ? "YES" : "NO " ) << std::endl - << " (QOptical)optical " << ( optical ? "YES" : "NO " ) << std::endl - << " (QDebug)debug_ " << ( debug_ ? "YES" : "NO " ) << std::endl - << " (QProp)prop " << ( prop ? "YES" : "NO " ) << std::endl - << " (QPMT)pmt " << ( pmt ? "YES" : "NO " ) << std::endl - << " (QMultiFilm)multifilm " << ( multifilm ? "YES" : "NO " ) << std::endl - << " (qsim)sim " << ( sim ? "YES" : "NO " ) << std::endl - << " (qsim)d_sim " << ( d_sim ? "YES" : "NO " ) << std::endl - << " (qdebug)dbg " << ( dbg ? "YES" : "NO " ) << std::endl - << " (qdebug)d_dbg " << ( d_dbg ? "YES" : "NO " ) << std::endl - ; + << "QSim::descComponents" << std::endl + << " (QBase)base " << (base ? "YES" : "NO ") << std::endl + << " (QEvt)qev " << (qev ? "YES" : "NO ") << std::endl + << " (SEvt)sev " << (sev ? "YES" : "NO ") << std::endl + << " (QRng)rng " << (rng ? "YES" : "NO ") << std::endl + << " (QScint)scint " << (scint ? "YES" : "NO ") << std::endl + << " (QCerenkov)cerenkov " << (cerenkov ? "YES" : "NO ") << std::endl + << " (QBnd)bnd " << (bnd ? "YES" : "NO ") << std::endl + << " (QOptical)optical " << (optical ? "YES" : "NO ") << std::endl + << " (QDebug)debug_ " << (debug_ ? "YES" : "NO ") << std::endl + << " (QProp)prop " << (prop ? "YES" : "NO ") << std::endl + << " (QPMT)pmt " << (pmt ? "YES" : "NO ") << std::endl + << " (QMultiFilm)multifilm " << (multifilm ? "YES" : "NO ") << std::endl + << " (qsim)sim " << (sim ? "YES" : "NO ") << std::endl + << " (qsim)d_sim " << (d_sim ? "YES" : "NO ") << std::endl + << " (qdebug)dbg " << (dbg ? "YES" : "NO ") << std::endl + << " (qdebug)d_dbg " << (d_dbg ? "YES" : "NO ") << std::endl; std::string s = ss.str(); - return s ; + return s; } - - - - -void QSim::configureLaunch(unsigned width, unsigned height ) +void QSim::configureLaunch(unsigned width, unsigned height) { QU::ConfigureLaunch(numBlocks, threadsPerBlock, width, height); } -void QSim::configureLaunch2D(unsigned width, unsigned height ) +void QSim::configureLaunch2D(unsigned width, unsigned height) { QU::ConfigureLaunch2D(numBlocks, threadsPerBlock, width, height); } @@ -988,20 +885,11 @@ void QSim::configureLaunch1D(unsigned num, unsigned threads_per_block) QU::ConfigureLaunch1D(numBlocks, threadsPerBlock, num, threads_per_block); } - std::string QSim::descLaunch() const { return QU::DescLaunch(numBlocks, threadsPerBlock); } - - - - - - - - /** QSim::rng_sequence mass production with multiple launches... -------------------------------------------------------------- @@ -1009,9 +897,8 @@ QSim::rng_sequence mass production with multiple launches... The output files are split too:: epsilon:opticks blyth$ np.py *.npy - a : TRngBufTest_0.npy : (10000, 16, 16) : 8f9b27c9416a0121574730baa742b5c9 : 20210715-1227 - epsilon:opticks blyth$ du -h TRngBufTest_0.npy - 20M TRngBufTest_0.npy + a : TRngBufTest_0.npy : (10000, 16, 16) : +8f9b27c9416a0121574730baa742b5c9 : 20210715-1227 epsilon:opticks blyth$ du -h TRngBufTest_0.npy 20M TRngBufTest_0.npy In [6]: (16*16*4*2*10000)/1e6 Out[6]: 20.48 @@ -1024,10 +911,9 @@ Upping to 1M would be 100x 20M = 2000M 2GB **/ - template -extern void QSim_rng_sequence( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, T* seq, unsigned ni, unsigned nj, unsigned id_offset ); - +extern void QSim_rng_sequence(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, T *seq, unsigned ni, unsigned nj, + unsigned id_offset); /** QSim::rng_sequence generate randoms in single CUDA launch @@ -1047,25 +933,22 @@ skipahead : used curand skipahead offsets depending on sim->evt->index and OPTIC **/ -template -void QSim::rng_sequence( T* seq, unsigned ni_tranche, unsigned nv, unsigned id_offset ) +template void QSim::rng_sequence(T *seq, unsigned ni_tranche, unsigned nv, unsigned id_offset) { - configureLaunch(ni_tranche, 1 ); + configureLaunch(ni_tranche, 1); - unsigned num_rng = ni_tranche*nv ; + unsigned num_rng = ni_tranche * nv; - const char* label = "QSim::rng_sequence:num_rng" ; + const char *label = "QSim::rng_sequence:num_rng"; - T* d_seq = QU::device_alloc(num_rng, label ); + T *d_seq = QU::device_alloc(num_rng, label); - QSim_rng_sequence( numBlocks, threadsPerBlock, d_sim, d_seq, ni_tranche, nv, id_offset ); + QSim_rng_sequence(numBlocks, threadsPerBlock, d_sim, d_seq, ni_tranche, nv, id_offset); - QU::copy_device_to_host_and_free( seq, d_seq, num_rng, label ); + QU::copy_device_to_host_and_free(seq, d_seq, num_rng, label); } - - -const char* QSim::PREFIX = "rng_sequence" ; +const char *QSim::PREFIX = "rng_sequence"; /** QSim::rng_sequence @@ -1085,77 +968,47 @@ Default *dir* is $TMP/QSimTest/rng_sequence leading to npy paths like:: **/ template -void QSim::rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ) +void QSim::rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size) { - assert( ni >= ni_tranche_size && ni % ni_tranche_size == 0 ); // total size *ni* must be integral multiple of *ni_tranche_size* - unsigned num_tranche = ni/ni_tranche_size ; - unsigned nv = nj*nk ; - - unsigned size = ni_tranche_size*nv ; // number of randoms to be generated in each launch - std::string reldir = QU::rng_sequence_reldir(PREFIX, ni, nj, nk, ni_tranche_size ) ; - - LOG(info) - << " ni " << ni - << " ni_tranche_size " << ni_tranche_size - << " num_tranche " << num_tranche - << " reldir " << reldir.c_str() - << " nj " << nj - << " nk " << nk - << " nv(nj*nk) " << nv - << " size(ni_tranche_size*nv) " << size - << " typecode " << QU::typecode() - ; + assert(ni >= ni_tranche_size && + ni % ni_tranche_size == 0); // total size *ni* must be integral multiple of *ni_tranche_size* + unsigned num_tranche = ni / ni_tranche_size; + unsigned nv = nj * nk; + + unsigned size = ni_tranche_size * nv; // number of randoms to be generated in each launch + std::string reldir = QU::rng_sequence_reldir(PREFIX, ni, nj, nk, ni_tranche_size); + LOG(info) << " ni " << ni << " ni_tranche_size " << ni_tranche_size << " num_tranche " << num_tranche << " reldir " + << reldir.c_str() << " nj " << nj << " nk " << nk << " nv(nj*nk) " << nv << " size(ni_tranche_size*nv) " + << size << " typecode " << QU::typecode(); // NB seq array memory gets reused for each launch and saved to different paths - NP* seq = NP::Make(ni_tranche_size, nj, nk) ; - T* seq_values = seq->values(); + NP *seq = NP::Make(ni_tranche_size, nj, nk); + T *seq_values = seq->values(); NP::INT seq_nv = seq->num_values(); + LOG(info) << " seq " << (seq ? seq->sstr() : "-") << " seq_values " << seq_values << " seq_nv " << seq_nv + << " seq_values[0] " << seq_values[0] << " seq_values[seq_nv-1] " << seq_values[seq_nv - 1]; - LOG(info) - << " seq " << ( seq ? seq->sstr() : "-" ) - << " seq_values " << seq_values - << " seq_nv " << seq_nv - << " seq_values[0] " << seq_values[0] - << " seq_values[seq_nv-1] " << seq_values[seq_nv-1] - ; - - - - for(unsigned t=0 ; t < num_tranche ; t++) + for (unsigned t = 0; t < num_tranche; t++) { // *id_offset* controls which rng_state/RNG to use - unsigned id_offset = ni_tranche_size*t ; - std::string name = QU::rng_sequence_name(PREFIX, ni_tranche_size, nj, nk, id_offset ) ; + unsigned id_offset = ni_tranche_size * t; + std::string name = QU::rng_sequence_name(PREFIX, ni_tranche_size, nj, nk, id_offset); - std::cout - << std::setw(3) << t - << std::setw(10) << id_offset - << std::setw(100) << name.c_str() - << std::endl - ; + std::cout << std::setw(3) << t << std::setw(10) << id_offset << std::setw(100) << name.c_str() << std::endl; - rng_sequence( seq_values, ni_tranche_size, nv, id_offset ); + rng_sequence(seq_values, ni_tranche_size, nv, id_offset); - const char* path = spath::Resolve(dir, reldir.c_str(), name.c_str() ); + const char *path = spath::Resolve(dir, reldir.c_str(), name.c_str()); seq->save(path); } } - - -template void QSim::rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ); -template void QSim::rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ); - - - - - - - - - +template void QSim::rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk, + unsigned ni_tranche_size); +template void QSim::rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk, + unsigned ni_tranche_size); /** QSim::scint_wavelength @@ -1167,95 +1020,87 @@ the typical values of 10 or 20 which depend on the buffer creation. **/ -extern void QSim_scint_wavelength( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, float* wavelength, unsigned num_wavelength ); +extern void QSim_scint_wavelength(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, float *wavelength, + unsigned num_wavelength); -NP* QSim::scint_wavelength(unsigned num_wavelength, unsigned& hd_factor ) +NP *QSim::scint_wavelength(unsigned num_wavelength, unsigned &hd_factor) { bool qsim_disable_hd = ssys::getenvbool("QSIM_DISABLE_HD"); - hd_factor = qsim_disable_hd ? 0u : scint->tex->getHDFactor() ; + hd_factor = qsim_disable_hd ? 0u : scint->tex->getHDFactor(); // HMM: perhaps get this from sim rather than occupying an argument slot - LOG(LEVEL) << "[" << " qsim_disable_hd " << qsim_disable_hd << " hd_factor " << hd_factor ; + LOG(LEVEL) << "[" << " qsim_disable_hd " << qsim_disable_hd << " hd_factor " << hd_factor; - configureLaunch(num_wavelength, 1 ); + configureLaunch(num_wavelength, 1); - float* d_wavelength = QU::device_alloc(num_wavelength, "QSim::scint_wavelength/num_wavelength"); + float *d_wavelength = QU::device_alloc(num_wavelength, "QSim::scint_wavelength/num_wavelength"); - QSim_scint_wavelength(numBlocks, threadsPerBlock, d_sim, d_wavelength, num_wavelength ); + QSim_scint_wavelength(numBlocks, threadsPerBlock, d_sim, d_wavelength, num_wavelength); - NP* w = NP::Make(num_wavelength) ; + NP *w = NP::Make(num_wavelength); - QU::copy_device_to_host_and_free( (float*)w->bytes(), d_wavelength, num_wavelength, "QSim::scint_wavelength" ); + QU::copy_device_to_host_and_free((float *)w->bytes(), d_wavelength, num_wavelength, + "QSim::scint_wavelength"); - LOG(LEVEL) << "]" ; + LOG(LEVEL) << "]"; - return w ; + return w; } +extern void QSim_RandGaussQ_shoot(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, float *v, unsigned num_v); -extern void QSim_RandGaussQ_shoot( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, float* v, unsigned num_v ); - -NP* QSim::RandGaussQ_shoot(unsigned num_v ) +NP *QSim::RandGaussQ_shoot(unsigned num_v) { - const char* label = "QSim::RandGaussQ_shoot/num" ; - configureLaunch(num_v, 1 ); + const char *label = "QSim::RandGaussQ_shoot/num"; + configureLaunch(num_v, 1); std::cout << label << " " << num_v << std::endl; - float* d_v = QU::device_alloc(num_v, label ); + float *d_v = QU::device_alloc(num_v, label); - QSim_RandGaussQ_shoot(numBlocks, threadsPerBlock, d_sim, d_v, num_v ); + QSim_RandGaussQ_shoot(numBlocks, threadsPerBlock, d_sim, d_v, num_v); cudaDeviceSynchronize(); - NP* v = NP::Make(num_v) ; - QU::copy_device_to_host_and_free( (float*)v->bytes(), d_v, num_v, label ); + NP *v = NP::Make(num_v); + QU::copy_device_to_host_and_free((float *)v->bytes(), d_v, num_v, label); - return v ; + return v; } - - - -void QSim::dump_wavelength( float* wavelength, unsigned num_wavelength, unsigned edgeitems ) +void QSim::dump_wavelength(float *wavelength, unsigned num_wavelength, unsigned edgeitems) { LOG(LEVEL); - for(unsigned i=0 ; i < num_wavelength ; i++) + for (unsigned i = 0; i < num_wavelength; i++) { - if( i < edgeitems || i > num_wavelength - edgeitems) + if (i < edgeitems || i > num_wavelength - edgeitems) { - std::cout - << std::setw(10) << i - << std::setw(10) << std::fixed << std::setprecision(3) << wavelength[i] - << std::endl - ; + std::cout << std::setw(10) << i << std::setw(10) << std::fixed << std::setprecision(3) << wavelength[i] + << std::endl; } } } +extern void QSim_dbg_gs_generate(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, qdebug *dbg, sphoton *photon, + unsigned num_photon, unsigned type); -extern void QSim_dbg_gs_generate(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, qdebug* dbg, sphoton* photon, unsigned num_photon, unsigned type ) ; - - -NP* QSim::dbg_gs_generate(unsigned num_photon, unsigned type ) +NP *QSim::dbg_gs_generate(unsigned num_photon, unsigned type) { - assert( type == SCINT_GENERATE || type == CERENKOV_GENERATE ); + assert(type == SCINT_GENERATE || type == CERENKOV_GENERATE); - configureLaunch( num_photon, 1 ); - sphoton* d_photon = QU::device_alloc(num_photon, "QSim::dbg_gs_generate:num_photon") ; + configureLaunch(num_photon, 1); + sphoton *d_photon = QU::device_alloc(num_photon, "QSim::dbg_gs_generate:num_photon"); QU::device_memset(d_photon, 0, num_photon); - QSim_dbg_gs_generate(numBlocks, threadsPerBlock, d_sim, d_dbg, d_photon, num_photon, type ); + QSim_dbg_gs_generate(numBlocks, threadsPerBlock, d_sim, d_dbg, d_photon, num_photon, type); - NP* p = NP::Make(num_photon, 4, 4); - const char* label = "QSim::dbg_gs_generate" ; + NP *p = NP::Make(num_photon, 4, 4); + const char *label = "QSim::dbg_gs_generate"; - QU::copy_device_to_host_and_free( (sphoton*)p->bytes(), d_photon, num_photon, label ); - return p ; + QU::copy_device_to_host_and_free((sphoton *)p->bytes(), d_photon, num_photon, label); + return p; } - - -extern void QSim_generate_photon(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim ) ; +extern void QSim_generate_photon(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim); /** QSim::generate_photon @@ -1263,92 +1108,71 @@ QSim::generate_photon **/ - void QSim::generate_photon() { - LOG(LEVEL) << "[" ; + LOG(LEVEL) << "["; - unsigned num_photon = qev->getNumPhoton() ; - LOG(info) << " num_photon " << num_photon ; + unsigned num_photon = qev->getNumPhoton(); + LOG(info) << " num_photon " << num_photon; - LOG_IF(fatal, num_photon == 0 ) - << " num_photon zero : MUST QEvt::setGenstep before QSim::generate_photon " - ; + LOG_IF(fatal, num_photon == 0) << " num_photon zero : MUST QEvt::setGenstep before QSim::generate_photon "; - assert( num_photon > 0 ); - assert( d_sim ); + assert(num_photon > 0); + assert(d_sim); - configureLaunch( num_photon, 1 ); + configureLaunch(num_photon, 1); - LOG(info) << "QSim_generate_photon... " ; + LOG(info) << "QSim_generate_photon... "; - QSim_generate_photon(numBlocks, threadsPerBlock, d_sim ); + QSim_generate_photon(numBlocks, threadsPerBlock, d_sim); - LOG(LEVEL) << "]" ; + LOG(LEVEL) << "]"; } +extern void QSim_fill_state_0(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad6 *state, unsigned num_state, + qdebug *dbg); - - - - -extern void QSim_fill_state_0(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad6* state, unsigned num_state, qdebug* dbg ); - -void QSim::fill_state_0(quad6* state, unsigned num_state) +void QSim::fill_state_0(quad6 *state, unsigned num_state) { - assert( d_sim ); - assert( d_dbg ); - - quad6* d_state = QU::device_alloc(num_state, "QSim::fill_state_0:num_state") ; + assert(d_sim); + assert(d_dbg); + quad6 *d_state = QU::device_alloc(num_state, "QSim::fill_state_0:num_state"); - unsigned threads_per_block = 32 ; - configureLaunch1D( num_state, threads_per_block ); + unsigned threads_per_block = 32; + configureLaunch1D(num_state, threads_per_block); - LOG(info) - << " num_state " << num_state - << " threads_per_block " << threads_per_block - << " descLaunch " << descLaunch() - ; + LOG(info) << " num_state " << num_state << " threads_per_block " << threads_per_block << " descLaunch " + << descLaunch(); - QSim_fill_state_0(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg ); + QSim_fill_state_0(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg); - const char* label = "QSim::fill_state_0" ; - QU::copy_device_to_host_and_free( state, d_state, num_state, label ); + const char *label = "QSim::fill_state_0"; + QU::copy_device_to_host_and_free(state, d_state, num_state, label); } +extern void QSim_fill_state_1(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, sstate *state, unsigned num_state, + qdebug *dbg); -extern void QSim_fill_state_1(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, sstate* state, unsigned num_state, qdebug* dbg ); - -void QSim::fill_state_1(sstate* state, unsigned num_state) +void QSim::fill_state_1(sstate *state, unsigned num_state) { - assert( d_sim ); - assert( d_dbg ); + assert(d_sim); + assert(d_dbg); - sstate* d_state = QU::device_alloc(num_state, "QSim::fill_state_1:num_state") ; + sstate *d_state = QU::device_alloc(num_state, "QSim::fill_state_1:num_state"); - unsigned threads_per_block = 64 ; - configureLaunch1D( num_state, threads_per_block ); + unsigned threads_per_block = 64; + configureLaunch1D(num_state, threads_per_block); - LOG(info) - << " num_state " << num_state - << " threads_per_block " << threads_per_block - << " descLaunch " << descLaunch() - ; + LOG(info) << " num_state " << num_state << " threads_per_block " << threads_per_block << " descLaunch " + << descLaunch(); - QSim_fill_state_1(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg ); + QSim_fill_state_1(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg); - const char* label = "QSim::fill_state_1" ; - QU::copy_device_to_host_and_free( state, d_state, num_state, label ); + const char *label = "QSim::fill_state_1"; + QU::copy_device_to_host_and_free(state, d_state, num_state, label); } - - - - - - - /** extern QSim_quad_launch -------------------------- @@ -1357,43 +1181,39 @@ This function is implemented in QSim.cu and it used by *quad_launch_generate* **/ -extern void QSim_quad_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad* q, unsigned num_quad, qdebug* dbg, unsigned type ); - - +extern void QSim_quad_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad *q, unsigned num_quad, qdebug *dbg, + unsigned type); -NP* QSim::quad_launch_generate(unsigned num_quad, unsigned type ) +NP *QSim::quad_launch_generate(unsigned num_quad, unsigned type) { - assert( d_sim ); - assert( d_dbg ); + assert(d_sim); + assert(d_dbg); - const char* label = "QSim::quad_launch_generate:num_quad" ; + const char *label = "QSim::quad_launch_generate:num_quad"; - quad* d_q = QU::device_alloc(num_quad, label ) ; + quad *d_q = QU::device_alloc(num_quad, label); - unsigned threads_per_block = 512 ; - configureLaunch1D( num_quad, threads_per_block ); + unsigned threads_per_block = 512; + configureLaunch1D(num_quad, threads_per_block); - QSim_quad_launch(numBlocks, threadsPerBlock, d_sim, d_q, num_quad, d_dbg, type ); + QSim_quad_launch(numBlocks, threadsPerBlock, d_sim, d_q, num_quad, d_dbg, type); - NP* q = NP::Make( num_quad, 4 ); - quad* qq = (quad*)q->bytes(); + NP *q = NP::Make(num_quad, 4); + quad *qq = (quad *)q->bytes(); - QU::copy_device_to_host_and_free( qq, d_q, num_quad, label ); + QU::copy_device_to_host_and_free(qq, d_q, num_quad, label); - if( type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA || type == QGEN_SMEAR_NORMAL_POLISH ) + if (type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA || type == QGEN_SMEAR_NORMAL_POLISH) { - q->set_meta("normal", scuda::serialize(dbg->normal) ); - q->set_meta("direction", scuda::serialize(dbg->direction) ); - q->set_meta("value", dbg->value ); - q->set_meta("valuename", type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA ? "sigma_alpha" : "polish" ); + q->set_meta("normal", scuda::serialize(dbg->normal)); + q->set_meta("direction", scuda::serialize(dbg->direction)); + q->set_meta("value", dbg->value); + q->set_meta("valuename", type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA ? "sigma_alpha" : "polish"); } - return q ; + return q; } - - - /** extern QSim_photon_launch -------------------------- @@ -1402,8 +1222,8 @@ This function is implemented in QSim.cu and it used by BOTH *photon_launch_gener **/ -extern void QSim_photon_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg, unsigned type ); - +extern void QSim_photon_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, sphoton *photon, unsigned num_photon, + qdebug *dbg, unsigned type); /** QSim::photon_launch_generate @@ -1414,32 +1234,29 @@ then downloads the generated photons into the host array. Contrast with *photon_ **/ -NP* QSim::photon_launch_generate(unsigned num_photon, unsigned type ) +NP *QSim::photon_launch_generate(unsigned num_photon, unsigned type) { - assert( d_sim ); - assert( d_dbg ); + assert(d_sim); + assert(d_dbg); - const char* label = "QSim::photon_launch_generate:num_photon" ; + const char *label = "QSim::photon_launch_generate:num_photon"; - sphoton* d_photon = QU::device_alloc(num_photon, label ) ; + sphoton *d_photon = QU::device_alloc(num_photon, label); QU::device_memset(d_photon, 0, num_photon); - unsigned threads_per_block = 512 ; - configureLaunch1D( num_photon, threads_per_block ); + unsigned threads_per_block = 512; + configureLaunch1D(num_photon, threads_per_block); - QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, num_photon, d_dbg, type ); + QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, num_photon, d_dbg, type); - NP* p = NP::Make(num_photon, 4, 4); - sphoton* photon = (sphoton*)p->bytes() ; + NP *p = NP::Make(num_photon, 4, 4); + sphoton *photon = (sphoton *)p->bytes(); - QU::copy_device_to_host_and_free( photon, d_photon, num_photon, label ); + QU::copy_device_to_host_and_free(photon, d_photon, num_photon, label); - return p ; + return p; } - - - /** QSim::photon_launch_mutate --------------------------- @@ -1448,45 +1265,35 @@ This uploads the photon array provided, mutates it and then downloads the change **/ -void QSim::photon_launch_mutate(sphoton* photon, unsigned num_photon, unsigned type ) +void QSim::photon_launch_mutate(sphoton *photon, unsigned num_photon, unsigned type) { - assert( d_sim ); - assert( d_dbg ); + assert(d_sim); + assert(d_dbg); - const char* label_0 = "QSim::photon_launch_mutate/d_photon" ; - sphoton* d_photon = QU::UploadArray(photon, num_photon, label_0 ); + const char *label_0 = "QSim::photon_launch_mutate/d_photon"; + sphoton *d_photon = QU::UploadArray(photon, num_photon, label_0); - unsigned DEBUG_NUM_PHOTON = ssys::getenvunsigned(_QSim__photon_launch_mutate_DEBUG_NUM_PHOTON, 0 ); - bool DEBUG_NUM_PHOTON_valid = DEBUG_NUM_PHOTON > 0 && DEBUG_NUM_PHOTON <= num_photon ; - unsigned u_num_photon = DEBUG_NUM_PHOTON_valid ? DEBUG_NUM_PHOTON : num_photon ; - bool SKIP_LAUNCH = ssys::getenvbool(_QSim__photon_launch_mutate_SKIP_LAUNCH) ; + unsigned DEBUG_NUM_PHOTON = ssys::getenvunsigned(_QSim__photon_launch_mutate_DEBUG_NUM_PHOTON, 0); + bool DEBUG_NUM_PHOTON_valid = DEBUG_NUM_PHOTON > 0 && DEBUG_NUM_PHOTON <= num_photon; + unsigned u_num_photon = DEBUG_NUM_PHOTON_valid ? DEBUG_NUM_PHOTON : num_photon; + bool SKIP_LAUNCH = ssys::getenvbool(_QSim__photon_launch_mutate_SKIP_LAUNCH); - LOG_IF( error, DEBUG_NUM_PHOTON_valid || true ) - << _QSim__photon_launch_mutate_DEBUG_NUM_PHOTON - << " DEBUG_NUM_PHOTON " << DEBUG_NUM_PHOTON - << " num_photon " << num_photon - << " u_num_photon " << u_num_photon - << _QSim__photon_launch_mutate_SKIP_LAUNCH - << " " << ( SKIP_LAUNCH ? "YES" : "NO " ) - ; + LOG_IF(error, DEBUG_NUM_PHOTON_valid || true) + << _QSim__photon_launch_mutate_DEBUG_NUM_PHOTON << " DEBUG_NUM_PHOTON " << DEBUG_NUM_PHOTON << " num_photon " + << num_photon << " u_num_photon " << u_num_photon << _QSim__photon_launch_mutate_SKIP_LAUNCH << " " + << (SKIP_LAUNCH ? "YES" : "NO "); - - if( SKIP_LAUNCH == false ) + if (SKIP_LAUNCH == false) { - unsigned threads_per_block = 512 ; - configureLaunch1D( u_num_photon, threads_per_block ); - QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, u_num_photon, d_dbg, type ); + unsigned threads_per_block = 512; + configureLaunch1D(u_num_photon, threads_per_block); + QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, u_num_photon, d_dbg, type); } - - const char* label_1 = "QSim::photon_launch_mutate" ; - QU::copy_device_to_host_and_free( photon, d_photon, u_num_photon, label_1 ); + const char *label_1 = "QSim::photon_launch_mutate"; + QU::copy_device_to_host_and_free(photon, d_photon, u_num_photon, label_1); } - - - - /** QSim::UploadFakePRD (formerly "UploadMockPRD" ) ---------------------------------------------------- @@ -1494,33 +1301,28 @@ QSim::UploadFakePRD (formerly "UploadMockPRD" ) Caution this returns a device pointer. **/ -quad2* QSim::UploadFakePRD(const NP* ip, const NP* prd) // static +quad2 *QSim::UploadFakePRD(const NP *ip, const NP *prd) // static { assert(ip); - int num_ip = ip->shape[0] ; - assert( num_ip > 0 ); + int num_ip = ip->shape[0]; + assert(num_ip > 0); - assert( prd->has_shape( num_ip, -1, 2, 4 ) ); // TODO: evt->max_record checking - assert( prd->shape.size() == 4 && prd->shape[2] == 2 && prd->shape[3] == 4 ); - int num_prd = prd->shape[0]*prd->shape[1] ; + assert(prd->has_shape(num_ip, -1, 2, 4)); // TODO: evt->max_record checking + assert(prd->shape.size() == 4 && prd->shape[2] == 2 && prd->shape[3] == 4); + int num_prd = prd->shape[0] * prd->shape[1]; - LOG(LEVEL) - << "[" - << " num_ip " << num_ip - << " num_prd " << num_prd - << " prd " << prd->sstr() - ; + LOG(LEVEL) << "[" << " num_ip " << num_ip << " num_prd " << num_prd << " prd " << prd->sstr(); - const char* label = "QSim::UploadFakePRD/d_prd" ; - quad2* d_prd = QU::UploadArray( (quad2*)prd->bytes(), num_prd, label ); + const char *label = "QSim::UploadFakePRD/d_prd"; + quad2 *d_prd = QU::UploadArray((quad2 *)prd->bytes(), num_prd, label); // prd is non-standard so it is appropriate to adhoc upload here - return d_prd ; + return d_prd; } #if !defined(PRODUCTION) -extern void QSim_fake_propagate_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad2* prd ); +extern void QSim_fake_propagate_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad2 *prd); #endif /** @@ -1543,7 +1345,7 @@ using common QEvt functionality **/ -void QSim::fake_propagate( const NP* prd, unsigned type ) +void QSim::fake_propagate(const NP *prd, unsigned type) { #if defined(PRODUCTION) (void)prd; @@ -1551,126 +1353,105 @@ void QSim::fake_propagate( const NP* prd, unsigned type ) LOG(fatal) << "QSim::fake_propagate is disabled in PRODUCTION builds"; std::raise(SIGINT); #else - const NP* ip = sev->getInputPhoton(); - int num_ip = ip ? ip->shape[0] : 0 ; - assert( num_ip > 0 ); + const NP *ip = sev->getInputPhoton(); + int num_ip = ip ? ip->shape[0] : 0; + assert(num_ip > 0); - quad2* d_prd = UploadFakePRD(ip, prd) ; + quad2 *d_prd = UploadFakePRD(ip, prd); - NP* igs = sev->makeGenstepArrayFromVector(); + NP *igs = sev->makeGenstepArrayFromVector(); int rc = qev->setGenstepUpload_NP(igs); - assert( rc == 0 ); - if(rc!=0) std::raise(SIGINT); + assert(rc == 0); + if (rc != 0) + std::raise(SIGINT); - sev->add_array("prd0", prd ); + sev->add_array("prd0", prd); // NB SEvt::beginOfEvent calls SEvt/clear so this addition // must be after that to succeed in being added to SEvt saved arrays int num_photon = qev->getNumPhoton(); - bool consistent_num_photon = num_photon == num_ip ; + bool consistent_num_photon = num_photon == num_ip; LOG_IF(fatal, !consistent_num_photon) - << "[" - << " num_ip " << num_ip - << " QEvt::getNumPhoton " << num_photon - << " consistent_num_photon " << ( consistent_num_photon ? "YES" : "NO " ) - << " prd " << prd->sstr() - ; + << "[" << " num_ip " << num_ip << " QEvt::getNumPhoton " << num_photon << " consistent_num_photon " + << (consistent_num_photon ? "YES" : "NO ") << " prd " << prd->sstr(); assert(consistent_num_photon); - assert( qev->upload_count > 0 ); + assert(qev->upload_count > 0); - unsigned threads_per_block = 512 ; - configureLaunch1D( num_photon, threads_per_block ); + unsigned threads_per_block = 512; + configureLaunch1D(num_photon, threads_per_block); - QSim_fake_propagate_launch(numBlocks, threadsPerBlock, d_sim, d_prd ); + QSim_fake_propagate_launch(numBlocks, threadsPerBlock, d_sim, d_prd); cudaDeviceSynchronize(); - - LOG(LEVEL) << "]" ; + LOG(LEVEL) << "]"; #endif } +extern void QSim_boundary_lookup_all(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, quad *lookup, unsigned width, + unsigned height); - -extern void QSim_boundary_lookup_all( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, quad* lookup, unsigned width, unsigned height ); - -NP* QSim::boundary_lookup_all(unsigned width, unsigned height ) +NP *QSim::boundary_lookup_all(unsigned width, unsigned height) { - LOG(LEVEL) << "[" ; - assert( bnd ); - assert( width <= getBoundaryTexWidth() ); - assert( height <= getBoundaryTexHeight() ); + LOG(LEVEL) << "["; + assert(bnd); + assert(width <= getBoundaryTexWidth()); + assert(height <= getBoundaryTexHeight()); - unsigned num_lookup = width*height ; - LOG(LEVEL) - << " width " << width - << " height " << height - << " num_lookup " << num_lookup - ; + unsigned num_lookup = width * height; + LOG(LEVEL) << " width " << width << " height " << height << " num_lookup " << num_lookup; + configureLaunch(width, height); - configureLaunch(width, height ); + const char *label = "QSim::boundary_lookup_all:num_lookup"; - const char* label = "QSim::boundary_lookup_all:num_lookup" ; + quad *d_lookup = QU::device_alloc(num_lookup, label); + QSim_boundary_lookup_all(numBlocks, threadsPerBlock, d_sim, d_lookup, width, height); - quad* d_lookup = QU::device_alloc(num_lookup, label ) ; - QSim_boundary_lookup_all(numBlocks, threadsPerBlock, d_sim, d_lookup, width, height ); + assert(height % 8 == 0); + unsigned num_bnd = height / 8; - assert( height % 8 == 0 ); - unsigned num_bnd = height/8 ; + NP *l = NP::Make(num_bnd, 4, 2, width, 4); + QU::copy_device_to_host_and_free((quad *)l->bytes(), d_lookup, num_lookup, label); - NP* l = NP::Make( num_bnd, 4, 2, width, 4 ); - QU::copy_device_to_host_and_free( (quad*)l->bytes(), d_lookup, num_lookup, label ); - - LOG(LEVEL) << "]" ; - - return l ; + LOG(LEVEL) << "]"; + return l; } -extern void QSim_boundary_lookup_line( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, quad* lookup, float* domain, unsigned num_lookup, unsigned line, unsigned k ); +extern void QSim_boundary_lookup_line(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, quad *lookup, float *domain, + unsigned num_lookup, unsigned line, unsigned k); - -NP* QSim::boundary_lookup_line( float* domain, unsigned num_lookup, unsigned line, unsigned k ) +NP *QSim::boundary_lookup_line(float *domain, unsigned num_lookup, unsigned line, unsigned k) { - LOG(LEVEL) - << "[" - << " num_lookup " << num_lookup - << " line " << line - << " k " << k - ; - - configureLaunch(num_lookup, 1 ); + LOG(LEVEL) << "[" << " num_lookup " << num_lookup << " line " << line << " k " << k; - float* d_domain = QU::device_alloc(num_lookup, "QSim::boundary_lookup_line:num_lookup") ; + configureLaunch(num_lookup, 1); - QU::copy_host_to_device( d_domain, domain, num_lookup ); + float *d_domain = QU::device_alloc(num_lookup, "QSim::boundary_lookup_line:num_lookup"); - const char* label = "QSim::boundary_lookup_line:num_lookup" ; + QU::copy_host_to_device(d_domain, domain, num_lookup); - quad* d_lookup = QU::device_alloc(num_lookup, label ) ; + const char *label = "QSim::boundary_lookup_line:num_lookup"; - QSim_boundary_lookup_line(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, num_lookup, line, k ); + quad *d_lookup = QU::device_alloc(num_lookup, label); + QSim_boundary_lookup_line(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, num_lookup, line, k); - NP* l = NP::Make( num_lookup, 4 ); + NP *l = NP::Make(num_lookup, 4); - QU::copy_device_to_host_and_free( (quad*)l->bytes(), d_lookup, num_lookup, label ); + QU::copy_device_to_host_and_free((quad *)l->bytes(), d_lookup, num_lookup, label); - QU::device_free( d_domain ); + QU::device_free(d_domain); - LOG(LEVEL) << "]" ; + LOG(LEVEL) << "]"; - return l ; + return l; } - - - - /** QSim::prop_lookup -------------------- @@ -1681,59 +1462,43 @@ below *prop_lookup_onebyone* **/ - template -extern void QSim_prop_lookup( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, T* lookup, const T* domain, unsigned domain_width, unsigned* pids, unsigned num_pids ); +extern void QSim_prop_lookup(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, T *lookup, const T *domain, + unsigned domain_width, unsigned *pids, unsigned num_pids); template -void QSim::prop_lookup( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids ) +void QSim::prop_lookup(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids) { - unsigned num_pids = pids.size() ; - unsigned num_lookup = num_pids*domain_width ; - LOG(LEVEL) - << "[" - << " num_pids " << num_pids - << " domain_width " << domain_width - << " num_lookup " << num_lookup - ; + unsigned num_pids = pids.size(); + unsigned num_lookup = num_pids * domain_width; + LOG(LEVEL) << "[" << " num_pids " << num_pids << " domain_width " << domain_width << " num_lookup " << num_lookup; - configureLaunch(domain_width, num_pids ); + configureLaunch(domain_width, num_pids); - unsigned* d_pids = QU::device_alloc(num_pids, "QSim::prop_lookup:num_pids") ; - T* d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup:domain_width") ; - T* d_lookup = QU::device_alloc(num_lookup , "QSim::prop_lookup:num_lookup") ; + unsigned *d_pids = QU::device_alloc(num_pids, "QSim::prop_lookup:num_pids"); + T *d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup:domain_width"); + T *d_lookup = QU::device_alloc(num_lookup, "QSim::prop_lookup:num_lookup"); - QU::copy_host_to_device( d_domain, domain, domain_width ); - QU::copy_host_to_device( d_pids, pids.data(), num_pids ); + QU::copy_host_to_device(d_domain, domain, domain_width); + QU::copy_host_to_device(d_pids, pids.data(), num_pids); - QSim_prop_lookup(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, d_pids, num_pids ); + QSim_prop_lookup(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, d_pids, num_pids); - QU::copy_device_to_host_and_free( lookup, d_lookup, num_lookup ); - QU::device_free( d_domain ); - QU::device_free( d_pids ); + QU::copy_device_to_host_and_free(lookup, d_lookup, num_lookup); + QU::device_free(d_domain); + QU::device_free(d_pids); - LOG(LEVEL) << "]" ; + LOG(LEVEL) << "]"; } - - /** Hmm doing lookups like this is a very common pattern, could do with a sub context to carry the pieces to simplify doing that. **/ template -extern void QSim_prop_lookup_one( - dim3 numBlocks, - dim3 threadsPerBlock, - qsim* sim, - T* lookup, - const T* domain, - unsigned domain_width, - unsigned num_pids, - unsigned pid, - unsigned ipid -); +extern void QSim_prop_lookup_one(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, T *lookup, const T *domain, + unsigned domain_width, unsigned num_pids, unsigned pid, unsigned ipid); /** QSim::prop_lookup_onebyone @@ -1749,203 +1514,155 @@ On device uses:: **/ template -void QSim::prop_lookup_onebyone( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids ) +void QSim::prop_lookup_onebyone(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids) { - unsigned num_pids = pids.size() ; - unsigned num_lookup = num_pids*domain_width ; - LOG(LEVEL) - << "[" - << " num_pids " << num_pids - << " domain_width " << domain_width - << " num_lookup " << num_lookup - ; + unsigned num_pids = pids.size(); + unsigned num_lookup = num_pids * domain_width; + LOG(LEVEL) << "[" << " num_pids " << num_pids << " domain_width " << domain_width << " num_lookup " << num_lookup; - configureLaunch(domain_width, 1 ); + configureLaunch(domain_width, 1); - T* d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup_onebyone:domain_width") ; - QU::copy_host_to_device( d_domain, domain, domain_width ); + T *d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup_onebyone:domain_width"); + QU::copy_host_to_device(d_domain, domain, domain_width); - const char* label = "QSim::prop_lookup_onebyone:num_lookup" ; + const char *label = "QSim::prop_lookup_onebyone:num_lookup"; - T* d_lookup = QU::device_alloc(num_lookup, label ) ; + T *d_lookup = QU::device_alloc(num_lookup, label); // separate launches for each pid - for(unsigned ipid=0 ; ipid < num_pids ; ipid++) + for (unsigned ipid = 0; ipid < num_pids; ipid++) { - unsigned pid = pids[ipid] ; - QSim_prop_lookup_one(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, num_pids, pid, ipid ); + unsigned pid = pids[ipid]; + QSim_prop_lookup_one(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, num_pids, pid, + ipid); } - QU::copy_device_to_host_and_free( lookup, d_lookup, num_lookup, label ); + QU::copy_device_to_host_and_free(lookup, d_lookup, num_lookup, label); - QU::device_free( d_domain ); + QU::device_free(d_domain); - LOG(LEVEL) << "]" ; + LOG(LEVEL) << "]"; } +template void QSim::prop_lookup_onebyone(float *, const float *, unsigned, const std::vector &); +template void QSim::prop_lookup_onebyone(double *, const double *, unsigned, const std::vector &); -template void QSim::prop_lookup_onebyone( float*, const float* , unsigned, const std::vector& ); -template void QSim::prop_lookup_onebyone( double*, const double* , unsigned, const std::vector& ); - - +extern void QSim_multifilm_lookup_all(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad2 *sample, quad2 *result, + unsigned width, unsigned height); - - - -extern void QSim_multifilm_lookup_all( dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad2* sample, quad2* result, unsigned width, unsigned height ); - -void QSim::multifilm_lookup_all( quad2 * sample , quad2 * result , unsigned width, unsigned height ) +void QSim::multifilm_lookup_all(quad2 *sample, quad2 *result, unsigned width, unsigned height) { - LOG(LEVEL) << "[" ; - unsigned num_lookup = width*height ; - unsigned size = num_lookup ; + LOG(LEVEL) << "["; + unsigned num_lookup = width * height; + unsigned size = num_lookup; - LOG(LEVEL) - << " width " << width - << " height " << height - << " num_lookup " << num_lookup - << " size "<(size, "QSim::multifilm_lookup_all:size" ) ; + // const float * c_sample = sample; + quad2 *d_sample = QU::device_alloc(size, "QSim::multifilm_lookup_all:size"); - const char* label = "QSim::multifilm_lookup_all:size" ; + const char *label = "QSim::multifilm_lookup_all:size"; - quad2* d_result = QU::device_alloc(size, label ) ; - LOG(LEVEL) - <<" copy_host_to_device( d_sample, sample , size) before"; - QU::copy_host_to_device( d_sample, sample , size); - LOG(LEVEL) - <<" copy_host_to_device( d_sample, sample , size) after"; + quad2 *d_result = QU::device_alloc(size, label); + LOG(LEVEL) << " copy_host_to_device( d_sample, sample , size) before"; + QU::copy_host_to_device(d_sample, sample, size); + LOG(LEVEL) << " copy_host_to_device( d_sample, sample , size) after"; - QSim_multifilm_lookup_all(numBlocks, threadsPerBlock, d_sim, d_sample, d_result, width, height ); - QU::copy_device_to_host_and_free( result , d_result , size, label ); + QSim_multifilm_lookup_all(numBlocks, threadsPerBlock, d_sim, d_sample, d_result, width, height); + QU::copy_device_to_host_and_free(result, d_result, size, label); QU::device_free(d_sample); cudaDeviceSynchronize(); - LOG(LEVEL) << "]" ; + LOG(LEVEL) << "]"; } - - - unsigned QSim::getBoundaryTexWidth() const { - return bnd->tex->width ; + return bnd->tex->width; } unsigned QSim::getBoundaryTexHeight() const { - return bnd->tex->height ; + return bnd->tex->height; } -const NP* QSim::getBoundaryTexSrc() const +const NP *QSim::getBoundaryTexSrc() const { - return bnd->src ; + return bnd->src; } -void QSim::dump_photon( quad4* photon, unsigned num_photon, const char* opt_, unsigned edgeitems ) +void QSim::dump_photon(quad4 *photon, unsigned num_photon, const char *opt_, unsigned edgeitems) { LOG(LEVEL); - std::string opt = opt_ ; + std::string opt = opt_; - bool f0 = opt.find("f0") != std::string::npos ; - bool f1 = opt.find("f1") != std::string::npos ; - bool f2 = opt.find("f2") != std::string::npos ; - bool f3 = opt.find("f3") != std::string::npos ; + bool f0 = opt.find("f0") != std::string::npos; + bool f1 = opt.find("f1") != std::string::npos; + bool f2 = opt.find("f2") != std::string::npos; + bool f3 = opt.find("f3") != std::string::npos; - bool i0 = opt.find("i0") != std::string::npos ; - bool i1 = opt.find("i1") != std::string::npos ; - bool i2 = opt.find("i2") != std::string::npos ; - bool i3 = opt.find("i3") != std::string::npos ; + bool i0 = opt.find("i0") != std::string::npos; + bool i1 = opt.find("i1") != std::string::npos; + bool i2 = opt.find("i2") != std::string::npos; + bool i3 = opt.find("i3") != std::string::npos; - int wi = 7 ; - int pr = 2 ; + int wi = 7; + int pr = 2; - for(unsigned i=0 ; i < num_photon ; i++) + for (unsigned i = 0; i < num_photon; i++) { - if( i < edgeitems || i > num_photon - edgeitems) + if (i < edgeitems || i > num_photon - edgeitems) { - const quad4& p = photon[i] ; - - std::cout - << std::setw(wi) << i - ; - - if(f0) std::cout - << " f0 " - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.x - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.y - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.z - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.w - ; - - if(f1) std::cout - << " f1 " - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.x - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.y - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.z - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.w - ; - - if(f2) std::cout - << " f2 " - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.x - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.y - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.z - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.w - ; - - if(f3) std::cout - << " f3 " - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.x - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.y - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.z - << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.w - ; - - if(i0) std::cout - << " i0 " - << std::setw(wi) << p.q0.i.x - << std::setw(wi) << p.q0.i.y - << std::setw(wi) << p.q0.i.z - << std::setw(wi) << p.q0.i.w - ; - - if(i1) std::cout - << " i1 " - << std::setw(wi) << p.q1.i.x - << std::setw(wi) << p.q1.i.y - << std::setw(wi) << p.q1.i.z - << std::setw(wi) << p.q1.i.w - ; - - if(i2) std::cout - << " i2 " - << std::setw(wi) << p.q2.i.x - << std::setw(wi) << p.q2.i.y - << std::setw(wi) << p.q2.i.z - << std::setw(wi) << p.q2.i.w - ; - - if(i3) std::cout - << " i3 " - << std::setw(wi) << p.q3.i.x - << std::setw(wi) << p.q3.i.y - << std::setw(wi) << p.q3.i.z - << std::setw(wi) << p.q3.i.w - ; - - std::cout - << std::endl - ; + const quad4 &p = photon[i]; + + std::cout << std::setw(wi) << i; + + if (f0) + std::cout << " f0 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.x << std::setw(wi) + << std::fixed << std::setprecision(pr) << p.q0.f.y << std::setw(wi) << std::fixed + << std::setprecision(pr) << p.q0.f.z << std::setw(wi) << std::fixed << std::setprecision(pr) + << p.q0.f.w; + + if (f1) + std::cout << " f1 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.x << std::setw(wi) + << std::fixed << std::setprecision(pr) << p.q1.f.y << std::setw(wi) << std::fixed + << std::setprecision(pr) << p.q1.f.z << std::setw(wi) << std::fixed << std::setprecision(pr) + << p.q1.f.w; + + if (f2) + std::cout << " f2 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.x << std::setw(wi) + << std::fixed << std::setprecision(pr) << p.q2.f.y << std::setw(wi) << std::fixed + << std::setprecision(pr) << p.q2.f.z << std::setw(wi) << std::fixed << std::setprecision(pr) + << p.q2.f.w; + + if (f3) + std::cout << " f3 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.x << std::setw(wi) + << std::fixed << std::setprecision(pr) << p.q3.f.y << std::setw(wi) << std::fixed + << std::setprecision(pr) << p.q3.f.z << std::setw(wi) << std::fixed << std::setprecision(pr) + << p.q3.f.w; + + if (i0) + std::cout << " i0 " << std::setw(wi) << p.q0.i.x << std::setw(wi) << p.q0.i.y << std::setw(wi) + << p.q0.i.z << std::setw(wi) << p.q0.i.w; + + if (i1) + std::cout << " i1 " << std::setw(wi) << p.q1.i.x << std::setw(wi) << p.q1.i.y << std::setw(wi) + << p.q1.i.z << std::setw(wi) << p.q1.i.w; + + if (i2) + std::cout << " i2 " << std::setw(wi) << p.q2.i.x << std::setw(wi) << p.q2.i.y << std::setw(wi) + << p.q2.i.z << std::setw(wi) << p.q2.i.w; + + if (i3) + std::cout << " i3 " << std::setw(wi) << p.q3.i.x << std::setw(wi) << p.q3.i.y << std::setw(wi) + << p.q3.i.z << std::setw(wi) << p.q3.i.w; + + std::cout << std::endl; } } } - /** QSim::Desc ------------ @@ -1960,10 +1677,10 @@ Dump flags with:: ssys_test **/ -std::string QSim::Desc(char delim) // static +std::string QSim::Desc(char delim) // static { - std::stringstream ss ; - ss << ( delim == ',' ? "" : "QSim::Desc\n" ) + std::stringstream ss; + ss << (delim == ',' ? "" : "QSim::Desc\n") #ifdef CONFIG_Debug << "CONFIG_Debug" #else @@ -2041,17 +1758,12 @@ std::string QSim::Desc(char delim) // static #else << "NOT-RNG_PHILITEOX" #endif - << delim - ; - std::string str = ss.str() ; - return str ; + << delim; + std::string str = ss.str(); + return str; } - - -std::string QSim::Switches() // static +std::string QSim::Switches() // static { return Desc(','); } - - diff --git a/qudarap/QSim.hh b/qudarap/QSim.hh index 5d9c38471..16e0f81a6 100644 --- a/qudarap/QSim.hh +++ b/qudarap/QSim.hh @@ -1,8 +1,8 @@ #pragma once +#include #include #include -#include #include "QUDARAP_API_EXPORT.hh" #include "plog/Severity.h" @@ -23,194 +23,182 @@ HMM : MOST OF THIS API IS FOR TESTING ONLY : TODO: Move lots to QSimTest perhap **/ -struct NP ; -struct SSim ; -struct SEvt ; +struct NP; +struct SSim; +struct SEvt; -template struct QTex ; -template struct QBuf ; -template struct QProp ; -template struct QPMT ; +template struct QTex; +template struct QBuf; +template struct QProp; +template struct QPMT; -struct qsim ; +struct qsim; -struct QBase ; -struct QEvt ; -struct QRng ; -struct QScint ; -struct QCerenkov ; -struct QBnd ; +struct QBase; +struct QEvt; +struct QRng; +struct QScint; +struct QWls; +struct QCerenkov; +struct QBnd; struct QMultiFilm; -struct QOptical ; -struct QEvt ; -struct QDebug ; +struct QOptical; +struct QEvt; +struct QDebug; -struct qdebug ; -struct sstate ; +struct qdebug; +struct sstate; -struct quad4 ; -struct quad2 ; -struct sphoton ; -union quad ; +struct quad4; +struct quad2; +struct sphoton; +union quad; -struct SSimulator ; +struct SSimulator; struct QUDARAP_API QSim { - static constexpr const int64_t M = 1000000 ; - static constexpr const int64_t G = 1000000000 ; - - static const plog::Severity LEVEL ; - static const char* PREFIX ; - static QSim* INSTANCE ; - static QSim* Get(); - static QSim* Create(); + static constexpr const int64_t M = 1000000; + static constexpr const int64_t G = 1000000000; - static void UploadComponents(const SSim* ssim); + static const plog::Severity LEVEL; + static const char *PREFIX; + static QSim *INSTANCE; + static QSim *Get(); + static QSim *Create(); - const QBase* base ; - QEvt* qev ; - SEvt* sev ; + static void UploadComponents(const SSim *ssim); - const QRng* rng ; - const QScint* scint ; - const QCerenkov* cerenkov ; - const QBnd* bnd ; - const QOptical* optical ; - const QDebug* debug_ ; + const QBase *base; + QEvt *qev; + SEvt *sev; - const QProp* prop ; - const QPMT* pmt ; - const QMultiFilm* multifilm ; + const QRng *rng; + const QScint *scint; + const QWls *qwls; + const QCerenkov *cerenkov; + const QBnd *bnd; + const QOptical *optical; + const QDebug *debug_; - qsim* sim ; - qsim* d_sim ; + const QProp *prop; + const QPMT *pmt; + const QMultiFilm *multifilm; - qdebug* dbg ; - qdebug* d_dbg ; + qsim *sim; + qsim *d_sim; - SSimulator* cx ; + qdebug *dbg; + qdebug *d_dbg; + SSimulator *cx; - dim3 numBlocks ; - dim3 threadsPerBlock ; + dim3 numBlocks; + dim3 threadsPerBlock; -private: + private: QSim(); void init(); - static constexpr const char* _QSim__REQUIRE_PMT = "QSim__REQUIRE_PMT" ; - static const bool REQUIRE_PMT; - - static constexpr const char* _QSim__SAVE_IGS_EVENTID = "QSim__SAVE_IGS_EVENTID" ; - static const int SAVE_IGS_EVENTID ; - - static constexpr const char* _QSim__SAVE_IGS_PATH = "QSim__SAVE_IGS_PATH" ; - static const char* SAVE_IGS_PATH ; + static constexpr const char *_QSim__REQUIRE_PMT = "QSim__REQUIRE_PMT"; + static const bool REQUIRE_PMT; - static constexpr const char* _QSim__CONCAT = "QSim__CONCAT" ; - static const bool CONCAT ; + static constexpr const char *_QSim__SAVE_IGS_EVENTID = "QSim__SAVE_IGS_EVENTID"; + static const int SAVE_IGS_EVENTID; - static constexpr const char* _QSim__ALLOC = "QSim__ALLOC" ; - static const bool ALLOC ; + static constexpr const char *_QSim__SAVE_IGS_PATH = "QSim__SAVE_IGS_PATH"; + static const char *SAVE_IGS_PATH; + static constexpr const char *_QSim__CONCAT = "QSim__CONCAT"; + static const bool CONCAT; -public: - void setLauncher(SSimulator* cx_ ); + static constexpr const char *_QSim__ALLOC = "QSim__ALLOC"; + static const bool ALLOC; - static constexpr const char* QSim__simulate_KEEP_SUBFOLD = "QSim__simulate_KEEP_SUBFOLD" ; - static bool KEEP_SUBFOLD ; + public: + void setLauncher(SSimulator *cx_); - double simulate(int eventID, bool reset_ ); // via cx launch - void simulate_final_merge(int64_t tot_ph, cudaStream_t stream); + static constexpr const char *QSim__simulate_KEEP_SUBFOLD = "QSim__simulate_KEEP_SUBFOLD"; + static bool KEEP_SUBFOLD; + double simulate(int eventID, bool reset_); // via cx launch + void simulate_final_merge(int64_t tot_ph, cudaStream_t stream); + NP *simulate(const NP *gs, int eventID); // higher level API for use from CSGOptiXService.h - NP* simulate(const NP* gs, int eventID ); // higher level API for use from CSGOptiXService.h + static void MaybeSaveIGS(int eventID, NP *igs); - static void MaybeSaveIGS(int eventID, NP* igs); + unsigned long long get_photon_slot_offset() const; - unsigned long long get_photon_slot_offset() const ; - - void reset( int eventID); + void reset(int eventID); double simtrace(int eventID); - - qsim* getDevicePtr() const ; - std::string desc() const ; - std::string descFull() const ; - std::string descComponents() const ; - + qsim *getDevicePtr() const; + std::string desc() const; + std::string descFull() const; + std::string descComponents() const; // TODO: relocate non-essential methods into tests or elsewhere - char getScintTexFilterMode() const ; + char getScintTexFilterMode() const; void configureLaunch16(); - void configureLaunch( unsigned width, unsigned height ); - void configureLaunch2D( unsigned width, unsigned height ); + void configureLaunch(unsigned width, unsigned height); + void configureLaunch2D(unsigned width, unsigned height); void configureLaunch1D(unsigned num, unsigned threads_per_block); - std::string descLaunch() const ; - + std::string descLaunch() const; - template - void rng_sequence( dim3 numblocks, dim3 threadsPerBlock, qsim* d_sim, T* d_seq, unsigned ni_tranche, unsigned nv, unsigned ioffset ); + template + void rng_sequence(dim3 numblocks, dim3 threadsPerBlock, qsim *d_sim, T *d_seq, unsigned ni_tranche, unsigned nv, + unsigned ioffset); - template - void rng_sequence( T* seq, unsigned ni, unsigned nj, unsigned ioffset ); + template void rng_sequence(T *seq, unsigned ni, unsigned nj, unsigned ioffset); - template - void rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ); + template + void rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size); + NP *scint_wavelength(unsigned num_wavelength, unsigned &hd_factor); - NP* scint_wavelength( unsigned num_wavelength, unsigned& hd_factor ); - - NP* RandGaussQ_shoot(unsigned num_v ); - + NP *RandGaussQ_shoot(unsigned num_v); // NP* cerenkov_wavelength_rejection_sampled( unsigned num_wavelength ); - void dump_wavelength( float* wavelength, unsigned num_wavelength, unsigned edgeitems=10 ); - - - NP* dbg_gs_generate(unsigned num_photon, unsigned type ); + void dump_wavelength(float *wavelength, unsigned num_wavelength, unsigned edgeitems = 10); + NP *dbg_gs_generate(unsigned num_photon, unsigned type); - void dump_photon( quad4* photon, unsigned num_photon, const char* opt="f0,f1,f2,i3", unsigned egdeitems=10 ); + void dump_photon(quad4 *photon, unsigned num_photon, const char *opt = "f0,f1,f2,i3", unsigned egdeitems = 10); void generate_photon(); - void fill_state_0(quad6* state, unsigned num_state); - void fill_state_1(sstate* state, unsigned num_state); + void fill_state_0(quad6 *state, unsigned num_state); + void fill_state_1(sstate *state, unsigned num_state); - NP* quad_launch_generate(unsigned num_quad, unsigned type ); - NP* photon_launch_generate(unsigned num_photon, unsigned type ); + NP *quad_launch_generate(unsigned num_quad, unsigned type); + NP *photon_launch_generate(unsigned num_photon, unsigned type); - static constexpr const char* _QSim__photon_launch_mutate_DEBUG_NUM_PHOTON = "QSim__photon_launch_mutate_DEBUG_NUM_PHOTON" ; - static constexpr const char* _QSim__photon_launch_mutate_SKIP_LAUNCH = "QSim__photon_launch_mutate_SKIP_LAUNCH" ; - void photon_launch_mutate( sphoton* photon, unsigned num_photon, unsigned type ); + static constexpr const char *_QSim__photon_launch_mutate_DEBUG_NUM_PHOTON = + "QSim__photon_launch_mutate_DEBUG_NUM_PHOTON"; + static constexpr const char *_QSim__photon_launch_mutate_SKIP_LAUNCH = "QSim__photon_launch_mutate_SKIP_LAUNCH"; + void photon_launch_mutate(sphoton *photon, unsigned num_photon, unsigned type); + static quad2 *UploadFakePRD(const NP *ip, const NP *prd); + void fake_propagate(const NP *prd, unsigned type); - static quad2* UploadFakePRD(const NP* ip, const NP* prd); - void fake_propagate(const NP* prd, unsigned type ); + unsigned getBoundaryTexWidth() const; + unsigned getBoundaryTexHeight() const; + const NP *getBoundaryTexSrc() const; - unsigned getBoundaryTexWidth() const ; - unsigned getBoundaryTexHeight() const ; - const NP* getBoundaryTexSrc() const ; + NP *boundary_lookup_all(unsigned width, unsigned height); + NP *boundary_lookup_line(float *domain, unsigned num_lookup, unsigned line, unsigned k); - NP* boundary_lookup_all( unsigned width, unsigned height ) ; - NP* boundary_lookup_line( float* domain, unsigned num_lookup, unsigned line, unsigned k ) ; + template + void prop_lookup(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids); + template + void prop_lookup_onebyone(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids); - template - void prop_lookup( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids ) ; + void multifilm_lookup_all(quad2 *sample, quad2 *result, unsigned width, unsigned height); - template - void prop_lookup_onebyone( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids ) ; - - void multifilm_lookup_all( quad2* sample , quad2* result , unsigned width, unsigned height ); - - static std::string Desc(char delim='\n'); + static std::string Desc(char delim = '\n'); static std::string Switches(); }; - - diff --git a/qudarap/QU.cc b/qudarap/QU.cc index 97aacf985..82fde9e2c 100644 --- a/qudarap/QU.cc +++ b/qudarap/QU.cc @@ -3,9 +3,9 @@ #include "NP.hh" #include "SLOG.hh" -#include "spath.h" -#include "sdirectory.h" #include "scuda.h" +#include "sdirectory.h" +#include "spath.h" #include "squad.h" #include "ssys.h" @@ -17,100 +17,91 @@ #include "sphoton.h" #include "sphotonlite.h" -#include "sevent.h" -#include "salloc.h" #include "SEventConfig.hh" +#include "salloc.h" +#include "sevent.h" -#include "QUDA_CHECK.h" #include "QU.hh" +#include "QUDA_CHECK.h" #include "curand_kernel.h" #include "qrng.h" #include "qsim.h" #include "qbase.h" -#include "qprop.h" -#include "qpmt.h" -#include "qdebug.h" -#include "qscint.h" #include "qcerenkov.h" #include "qcurandwrap.h" -#include "scurandref.h" +#include "qdebug.h" #include "qmultifilm.h" +#include "qpmt.h" +#include "qprop.h" +#include "qscint.h" +#include "qwls.h" +#include "scurandref.h" - -const plog::Severity QU::LEVEL = SLOG::EnvLevel("QU", "DEBUG") ; +const plog::Severity QU::LEVEL = SLOG::EnvLevel("QU", "DEBUG"); bool QU::MEMCHECK = ssys::getenvbool(_MEMCHECK); -salloc* QU::alloc = nullptr ; - +salloc *QU::alloc = nullptr; -void QU::alloc_add(const char* label, uint64_t num_items, uint64_t sizeof_item ) // static +void QU::alloc_add(const char *label, uint64_t num_items, uint64_t sizeof_item) // static { - if(!alloc) alloc = SEventConfig::ALLOC ; - if(alloc ) alloc->add(label, num_items, sizeof_item ); + if (!alloc) + alloc = SEventConfig::ALLOC; + if (alloc) + alloc->add(label, num_items, sizeof_item); } - -template -char QU::typecode() +template char QU::typecode() { - char c = '?' ; - switch(sizeof(T)) + char c = '?'; + switch (sizeof(T)) { - case 4: c = 'f' ; break ; - case 8: c = 'd' ; break ; + case 4: + c = 'f'; + break; + case 8: + c = 'd'; + break; } - return c ; + return c; } -template char QU::typecode() ; -template char QU::typecode() ; - +template char QU::typecode(); +template char QU::typecode(); template -std::string QU::rng_sequence_name(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset ) // static +std::string QU::rng_sequence_name(const char *prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset) // static { - std::stringstream ss ; - ss << prefix - << "_" << QU::typecode() - << "_ni" << ni - << "_nj" << nj - << "_nk" << nk - << "_ioffset" << std::setw(6) << std::setfill('0') << ioffset - << ".npy" - ; + std::stringstream ss; + ss << prefix << "_" << QU::typecode() << "_ni" << ni << "_nj" << nj << "_nk" << nk << "_ioffset" << std::setw(6) + << std::setfill('0') << ioffset << ".npy"; std::string name = ss.str(); - return name ; + return name; } -template std::string QU::rng_sequence_name(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset ) ; -template std::string QU::rng_sequence_name(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset ) ; - - +template std::string QU::rng_sequence_name(const char *prefix, unsigned ni, unsigned nj, unsigned nk, + unsigned ioffset); +template std::string QU::rng_sequence_name(const char *prefix, unsigned ni, unsigned nj, unsigned nk, + unsigned ioffset); template -std::string QU::rng_sequence_reldir(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ) // static +std::string QU::rng_sequence_reldir(const char *prefix, unsigned ni, unsigned nj, unsigned nk, + unsigned ni_tranche_size) // static { - std::stringstream ss ; - ss << prefix - << "_" << QU::typecode() - << "_ni" << ni - << "_nj" << nj - << "_nk" << nk - << "_tranche" << ni_tranche_size - ; + std::stringstream ss; + ss << prefix << "_" << QU::typecode() << "_ni" << ni << "_nj" << nj << "_nk" << nk << "_tranche" + << ni_tranche_size; std::string reldir = ss.str(); - return reldir ; + return reldir; } -template std::string QU::rng_sequence_reldir(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ) ; -template std::string QU::rng_sequence_reldir(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ) ; - - - +template std::string QU::rng_sequence_reldir(const char *prefix, unsigned ni, unsigned nj, unsigned nk, + unsigned ni_tranche_size); +template std::string QU::rng_sequence_reldir(const char *prefix, unsigned ni, unsigned nj, unsigned nk, + unsigned ni_tranche_size); /** QU::UploadArray @@ -120,61 +111,52 @@ Allocate on device and copy from host to device **/ -template -T* QU::UploadArray(const T* array, unsigned num_items, const char* label ) // static +template T *QU::UploadArray(const T *array, unsigned num_items, const char *label) // static { - size_t size = num_items*sizeof(T) ; - - LOG(LEVEL) - << " num_items " << num_items - << " size " << size - << " label " << ( label ? label : "-" ) - ; + size_t size = num_items * sizeof(T); - LOG_IF(info, MEMCHECK) - << " num_items " << num_items - << " size " << size - << " label " << ( label ? label : "-" ) - ; + LOG(LEVEL) << " num_items " << num_items << " size " << size << " label " << (label ? label : "-"); + LOG_IF(info, MEMCHECK) << " num_items " << num_items << " size " << size << " label " << (label ? label : "-"); - alloc_add( label, num_items, sizeof(T) ) ; + alloc_add(label, num_items, sizeof(T)); - T* d_array = nullptr ; - QUDA_CHECK( cudaMalloc(reinterpret_cast( &d_array ), size )); - QUDA_CHECK( cudaMemcpy(reinterpret_cast( d_array ), array, size, cudaMemcpyHostToDevice )); - return d_array ; + T *d_array = nullptr; + QUDA_CHECK(cudaMalloc(reinterpret_cast(&d_array), size)); + QUDA_CHECK(cudaMemcpy(reinterpret_cast(d_array), array, size, cudaMemcpyHostToDevice)); + return d_array; } - // IF NEED THESE FROM REMOVE PKG WILL NEED TO QUDARAP_API -template float* QU::UploadArray(const float* array, unsigned num_items, const char* label ) ; -template double* QU::UploadArray(const double* array, unsigned num_items, const char* label) ; -template unsigned* QU::UploadArray(const unsigned* array, unsigned num_items, const char* label) ; -template int* QU::UploadArray(const int* array, unsigned num_items, const char* label) ; -template quad4* QU::UploadArray(const quad4* array, unsigned num_items, const char* label) ; -template sphoton* QU::UploadArray(const sphoton* array, unsigned num_items, const char* label) ; -template sphotonlite* QU::UploadArray(const sphotonlite* array, unsigned num_items, const char* label) ; -template quad2* QU::UploadArray(const quad2* array, unsigned num_items, const char* label) ; -template XORWOW* QU::UploadArray(const XORWOW* array, unsigned num_items, const char* label) ; -template Philox* QU::UploadArray(const Philox* array, unsigned num_items, const char* label) ; -template qcurandwrap* QU::UploadArray>(const qcurandwrap* array, unsigned num_items, const char* label) ; -template scurandref* QU::UploadArray>(const scurandref* array, unsigned num_items, const char* label) ; -template qsim* QU::UploadArray(const qsim* array, unsigned num_items, const char* label) ; -template qprop* QU::UploadArray>(const qprop* array, unsigned num_items, const char* label) ; -template qprop* QU::UploadArray>(const qprop* array, unsigned num_items, const char* label) ; -template qpmt* QU::UploadArray>(const qpmt* array, unsigned num_items, const char* label) ; -template qpmt* QU::UploadArray>(const qpmt* array, unsigned num_items, const char* label) ; -template qmultifilm* QU::UploadArray(const qmultifilm* array, unsigned num_items, const char* label) ; -template qrng* QU::UploadArray>(const qrng* array, unsigned num_items, const char* label) ; -template qbnd* QU::UploadArray(const qbnd* array, unsigned num_items, const char* label) ; -template sevent* QU::UploadArray(const sevent* array, unsigned num_items, const char* label) ; -template qdebug* QU::UploadArray(const qdebug* array, unsigned num_items, const char* label) ; -template qscint* QU::UploadArray(const qscint* array, unsigned num_items, const char* label) ; -template qcerenkov* QU::UploadArray(const qcerenkov* array, unsigned num_items, const char* label) ; -template qbase* QU::UploadArray(const qbase* array, unsigned num_items, const char* label) ; - - +template float *QU::UploadArray(const float *array, unsigned num_items, const char *label); +template double *QU::UploadArray(const double *array, unsigned num_items, const char *label); +template unsigned *QU::UploadArray(const unsigned *array, unsigned num_items, const char *label); +template int *QU::UploadArray(const int *array, unsigned num_items, const char *label); +template quad4 *QU::UploadArray(const quad4 *array, unsigned num_items, const char *label); +template sphoton *QU::UploadArray(const sphoton *array, unsigned num_items, const char *label); +template sphotonlite *QU::UploadArray(const sphotonlite *array, unsigned num_items, const char *label); +template quad2 *QU::UploadArray(const quad2 *array, unsigned num_items, const char *label); +template XORWOW *QU::UploadArray(const XORWOW *array, unsigned num_items, const char *label); +template Philox *QU::UploadArray(const Philox *array, unsigned num_items, const char *label); +template qcurandwrap *QU::UploadArray>(const qcurandwrap *array, unsigned num_items, + const char *label); +template scurandref *QU::UploadArray>(const scurandref *array, unsigned num_items, + const char *label); +template qsim *QU::UploadArray(const qsim *array, unsigned num_items, const char *label); +template qprop *QU::UploadArray>(const qprop *array, unsigned num_items, const char *label); +template qprop *QU::UploadArray>(const qprop *array, unsigned num_items, + const char *label); +template qpmt *QU::UploadArray>(const qpmt *array, unsigned num_items, const char *label); +template qpmt *QU::UploadArray>(const qpmt *array, unsigned num_items, const char *label); +template qmultifilm *QU::UploadArray(const qmultifilm *array, unsigned num_items, const char *label); +template qrng *QU::UploadArray>(const qrng *array, unsigned num_items, const char *label); +template qbnd *QU::UploadArray(const qbnd *array, unsigned num_items, const char *label); +template sevent *QU::UploadArray(const sevent *array, unsigned num_items, const char *label); +template qdebug *QU::UploadArray(const qdebug *array, unsigned num_items, const char *label); +template qscint *QU::UploadArray(const qscint *array, unsigned num_items, const char *label); +template qwls *QU::UploadArray(const qwls *array, unsigned num_items, const char *label); +template qcerenkov *QU::UploadArray(const qcerenkov *array, unsigned num_items, const char *label); +template qbase *QU::UploadArray(const qbase *array, unsigned num_items, const char *label); /** QU::DownloadArray @@ -184,65 +166,57 @@ Allocate on host and copy from device to host **/ -template -T* QU::DownloadArray(const T* d_array, unsigned num_items ) // static +template T *QU::DownloadArray(const T *d_array, unsigned num_items) // static { - T* array = new T[num_items] ; - QUDA_CHECK( cudaMemcpy( array, d_array, sizeof(T)*num_items, cudaMemcpyDeviceToHost )); - return array ; + T *array = new T[num_items]; + QUDA_CHECK(cudaMemcpy(array, d_array, sizeof(T) * num_items, cudaMemcpyDeviceToHost)); + return array; } - -template float* QU::DownloadArray(const float* d_array, unsigned num_items) ; -template unsigned* QU::DownloadArray(const unsigned* d_array, unsigned num_items) ; -template int* QU::DownloadArray(const int* d_array, unsigned num_items) ; -template quad4* QU::DownloadArray(const quad4* d_array, unsigned num_items) ; -template quad2* QU::DownloadArray(const quad2* d_array, unsigned num_items) ; -template XORWOW* QU::DownloadArray(const XORWOW* d_array, unsigned num_items) ; -template Philox* QU::DownloadArray(const Philox* d_array, unsigned num_items) ; -template qprop* QU::DownloadArray>(const qprop* d_array, unsigned num_items) ; -template qprop* QU::DownloadArray>(const qprop* d_array, unsigned num_items) ; - - -template -void QU::Download(std::vector& vec, const T* d_array, unsigned num_items) // static +template float *QU::DownloadArray(const float *d_array, unsigned num_items); +template unsigned *QU::DownloadArray(const unsigned *d_array, unsigned num_items); +template int *QU::DownloadArray(const int *d_array, unsigned num_items); +template quad4 *QU::DownloadArray(const quad4 *d_array, unsigned num_items); +template quad2 *QU::DownloadArray(const quad2 *d_array, unsigned num_items); +template XORWOW *QU::DownloadArray(const XORWOW *d_array, unsigned num_items); +template Philox *QU::DownloadArray(const Philox *d_array, unsigned num_items); +template qprop *QU::DownloadArray>(const qprop *d_array, unsigned num_items); +template qprop *QU::DownloadArray>(const qprop *d_array, unsigned num_items); + +template void QU::Download(std::vector &vec, const T *d_array, unsigned num_items) // static { - vec.resize( num_items); - QUDA_CHECK( cudaMemcpy( static_cast( vec.data() ), d_array, num_items*sizeof(T), cudaMemcpyDeviceToHost)); + vec.resize(num_items); + QUDA_CHECK(cudaMemcpy(static_cast(vec.data()), d_array, num_items * sizeof(T), cudaMemcpyDeviceToHost)); } +template QUDARAP_API void QU::Download(std::vector &vec, const float *d_array, unsigned num_items); +template QUDARAP_API void QU::Download(std::vector &vec, const unsigned *d_array, + unsigned num_items); +template QUDARAP_API void QU::Download(std::vector &vec, const int *d_array, unsigned num_items); +template QUDARAP_API void QU::Download(std::vector &vec, const uchar4 *d_array, unsigned num_items); +template QUDARAP_API void QU::Download(std::vector &vec, const float4 *d_array, unsigned num_items); +template QUDARAP_API void QU::Download(std::vector &vec, const quad4 *d_array, unsigned num_items); -template QUDARAP_API void QU::Download( std::vector& vec, const float* d_array, unsigned num_items); -template QUDARAP_API void QU::Download(std::vector& vec, const unsigned* d_array, unsigned num_items); -template QUDARAP_API void QU::Download( std::vector& vec, const int* d_array, unsigned num_items); -template QUDARAP_API void QU::Download( std::vector& vec, const uchar4* d_array, unsigned num_items); -template QUDARAP_API void QU::Download( std::vector& vec, const float4* d_array, unsigned num_items); -template QUDARAP_API void QU::Download( std::vector& vec, const quad4* d_array, unsigned num_items); - - - -template -void QU::device_free_and_alloc(T** dd, unsigned num_items ) // dd: pointer-to-device-pointer +template void QU::device_free_and_alloc(T **dd, unsigned num_items) // dd: pointer-to-device-pointer { - size_t size = num_items*sizeof(T) ; - LOG_IF(info, MEMCHECK) << " size " << size << " num_items " << num_items ; + size_t size = num_items * sizeof(T); + LOG_IF(info, MEMCHECK) << " size " << size << " num_items " << num_items; - QUDA_CHECK( cudaFree( reinterpret_cast( *dd ) ) ); - QUDA_CHECK( cudaMalloc(reinterpret_cast( dd ), size )); - assert( *dd ); + QUDA_CHECK(cudaFree(reinterpret_cast(*dd))); + QUDA_CHECK(cudaMalloc(reinterpret_cast(dd), size)); + assert(*dd); } +template QUDARAP_API void QU::device_free_and_alloc(float **dd, unsigned num_items); +template QUDARAP_API void QU::device_free_and_alloc(double **dd, unsigned num_items); +template QUDARAP_API void QU::device_free_and_alloc(unsigned **dd, unsigned num_items); +template QUDARAP_API void QU::device_free_and_alloc(int **dd, unsigned num_items); +template QUDARAP_API void QU::device_free_and_alloc(quad **dd, unsigned num_items); +template QUDARAP_API void QU::device_free_and_alloc(uchar4 **dd, unsigned num_items); +template QUDARAP_API void QU::device_free_and_alloc(float4 **dd, unsigned num_items); +template QUDARAP_API void QU::device_free_and_alloc(quad4 **dd, unsigned num_items); -template QUDARAP_API void QU::device_free_and_alloc(float** dd, unsigned num_items) ; -template QUDARAP_API void QU::device_free_and_alloc(double** dd, unsigned num_items) ; -template QUDARAP_API void QU::device_free_and_alloc(unsigned** dd, unsigned num_items) ; -template QUDARAP_API void QU::device_free_and_alloc(int** dd, unsigned num_items) ; -template QUDARAP_API void QU::device_free_and_alloc(quad** dd, unsigned num_items) ; -template QUDARAP_API void QU::device_free_and_alloc(uchar4** dd, unsigned num_items) ; -template QUDARAP_API void QU::device_free_and_alloc(float4** dd, unsigned num_items) ; -template QUDARAP_API void QU::device_free_and_alloc(quad4** dd, unsigned num_items) ; - -const char* QU::_cudaMalloc_OOM_NOTES = R"( ; +const char *QU::_cudaMalloc_OOM_NOTES = R"( ; QU::_cudaMalloc_OOM_NOTES ========================== @@ -255,239 +229,190 @@ One million is typically reasonable for debugging:: export OPTICKS_MAX_SLOT=M1 -)" ; - - +)"; - -void QU::_cudaMalloc( void** p2p, size_t size, const char* label ) +void QU::_cudaMalloc(void **p2p, size_t size, const char *label) { - cudaError_t err = cudaMalloc(p2p, size ) ; - if( err != cudaSuccess ) + cudaError_t err = cudaMalloc(p2p, size); + if (err != cudaSuccess) { - const char* out = spath::Resolve("$DefaultOutputDir") ; - salloc* estimate = SEventConfig::AllocEstimate(); + const char *out = spath::Resolve("$DefaultOutputDir"); + salloc *estimate = SEventConfig::AllocEstimate(); std::stringstream ss; - ss << "CUDA call (" << label << " ) failed with error: '" - << cudaGetErrorString( err ) - << "' (" __FILE__ << ":" << __LINE__ << ")" - << "\n\n" - << "[SEventConfig::DescEventMode (use of DebugHeavy/DebugLite EventMode with high stats is typical cause of OOM errors)\n" + ss << "CUDA call (" << label << " ) failed with error: '" << cudaGetErrorString(err) << "' (" __FILE__ << ":" + << __LINE__ << ")" << "\n\n" + << "[SEventConfig::DescEventMode (use of DebugHeavy/DebugLite EventMode with high stats is typical cause of " + "OOM errors)\n" << SEventConfig::DescEventMode() - << "]SEventConfig::DescEventMode (use of DebugHeavy/DebugLite EventMode with high stats is typical cause of OOM errors)\n" + << "]SEventConfig::DescEventMode (use of DebugHeavy/DebugLite EventMode with high stats is typical cause of " + "OOM errors)\n" << "\n\n" << "[alloc.desc\n" - << ( alloc ? alloc->desc() : "no-alloc" ) - << "]alloc.desc\n" + << (alloc ? alloc->desc() : "no-alloc") << "]alloc.desc\n" << "\n" << "[NOTES\n" - << _cudaMalloc_OOM_NOTES - << "]NOTES\n" + << _cudaMalloc_OOM_NOTES << "]NOTES\n" << "\n\n" << "[SEventConfig::AllocEstimate\n" - << ( estimate ? estimate->desc() : "no-estimate" ) - << "]SEventConfig::AllocEstimate\n" - << "save salloc record to [" << out << "]\n" ; - ; + << (estimate ? estimate->desc() : "no-estimate") << "]SEventConfig::AllocEstimate\n" + << "save salloc record to [" << out << "]\n"; + ; std::string msg = ss.str(); - LOG(error) << msg ; + LOG(error) << msg; - sdirectory::MakeDirs(out,0); - alloc->save(out) ; + sdirectory::MakeDirs(out, 0); + alloc->save(out); - throw QUDA_Exception( msg.c_str() ); + throw QUDA_Exception(msg.c_str()); } } - -template -T* QU::device_alloc( unsigned num_items, const char* label ) +template T *QU::device_alloc(unsigned num_items, const char *label) { - size_t size = num_items*sizeof(T) ; + size_t size = num_items * sizeof(T); - LOG(LEVEL) - << " num_items " << std::setw(10) << num_items - << " size " << std::setw(10) << size - << " label " << std::setw(15) << label - ; + LOG(LEVEL) << " num_items " << std::setw(10) << num_items << " size " << std::setw(10) << size << " label " + << std::setw(15) << label; - LOG_IF(info, MEMCHECK) - << " num_items " << std::setw(10) << num_items - << " size " << std::setw(10) << size - << " label " << std::setw(15) << label - ; + LOG_IF(info, MEMCHECK) << " num_items " << std::setw(10) << num_items << " size " << std::setw(10) << size + << " label " << std::setw(15) << label; + alloc_add(label, num_items, sizeof(T)); - alloc_add( label, num_items, sizeof(T) ) ; + T *d; + _cudaMalloc(reinterpret_cast(&d), size, label); - T* d ; - _cudaMalloc( reinterpret_cast( &d ), size, label ); - - return d ; + return d; } -template QUDARAP_API float* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API double* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API unsigned* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API int* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API uchar4* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API float4* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API quad* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API quad2* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API quad4* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API quad6* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API sevent* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API qdebug* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API sstate* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API XORWOW* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API Philox* QU::device_alloc(unsigned num_items, const char* label) ; +template QUDARAP_API float *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API double *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API unsigned *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API int *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API uchar4 *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API float4 *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API quad *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API quad2 *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API quad4 *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API quad6 *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API sevent *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API qdebug *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API sstate *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API XORWOW *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API Philox *QU::device_alloc(unsigned num_items, const char *label); #ifndef PRODUCTION -template QUDARAP_API srec* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API sseq* QU::device_alloc(unsigned num_items, const char* label) ; +template QUDARAP_API srec *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API sseq *QU::device_alloc(unsigned num_items, const char *label); #endif -template QUDARAP_API sphoton* QU::device_alloc(unsigned num_items, const char* label) ; -template QUDARAP_API sphotonlite* QU::device_alloc(unsigned num_items, const char* label) ; - +template QUDARAP_API sphoton *QU::device_alloc(unsigned num_items, const char *label); +template QUDARAP_API sphotonlite *QU::device_alloc(unsigned num_items, const char *label); -template -T* QU::device_alloc_zero(unsigned num_items, const char* label) +template T *QU::device_alloc_zero(unsigned num_items, const char *label) { - size_t size = num_items*sizeof(T) ; + size_t size = num_items * sizeof(T); - LOG(LEVEL) - << " num_items " << std::setw(10) << num_items - << " sizeof(T) " << std::setw(10) << sizeof(T) - << " size " << std::setw(10) << size - << " label " << std::setw(15) << label - ; + LOG(LEVEL) << " num_items " << std::setw(10) << num_items << " sizeof(T) " << std::setw(10) << sizeof(T) << " size " + << std::setw(10) << size << " label " << std::setw(15) << label; - LOG_IF(info, MEMCHECK) - << " num_items " << std::setw(10) << num_items - << " sizeof(T) " << std::setw(10) << sizeof(T) - << " size " << std::setw(10) << size - << " label " << std::setw(15) << label - ; + LOG_IF(info, MEMCHECK) << " num_items " << std::setw(10) << num_items << " sizeof(T) " << std::setw(10) << sizeof(T) + << " size " << std::setw(10) << size << " label " << std::setw(15) << label; + alloc_add(label, num_items, sizeof(T)); - alloc_add( label, num_items, sizeof(T) ) ; + T *d; + _cudaMalloc(reinterpret_cast(&d), size, label); - T* d ; - _cudaMalloc( reinterpret_cast( &d ), size, label ); + int value = 0; + QUDA_CHECK(cudaMemset(d, value, size)); - int value = 0 ; - QUDA_CHECK( cudaMemset(d, value, size )); - - return d ; + return d; } -template QUDARAP_API sphoton* QU::device_alloc_zero(unsigned num_items, const char* label) ; -template QUDARAP_API sphotonlite* QU::device_alloc_zero(unsigned num_items, const char* label) ; -template QUDARAP_API quad2* QU::device_alloc_zero( unsigned num_items, const char* label) ; -template QUDARAP_API XORWOW* QU::device_alloc_zero( unsigned num_items, const char* label) ; -template QUDARAP_API Philox* QU::device_alloc_zero( unsigned num_items, const char* label) ; +template QUDARAP_API sphoton *QU::device_alloc_zero(unsigned num_items, const char *label); +template QUDARAP_API sphotonlite *QU::device_alloc_zero(unsigned num_items, const char *label); +template QUDARAP_API quad2 *QU::device_alloc_zero(unsigned num_items, const char *label); +template QUDARAP_API XORWOW *QU::device_alloc_zero(unsigned num_items, const char *label); +template QUDARAP_API Philox *QU::device_alloc_zero(unsigned num_items, const char *label); #ifndef PRODUCTION -template QUDARAP_API srec* QU::device_alloc_zero( unsigned num_items, const char* label) ; -template QUDARAP_API sseq* QU::device_alloc_zero( unsigned num_items, const char* label) ; -template QUDARAP_API stag* QU::device_alloc_zero( unsigned num_items, const char* label) ; -template QUDARAP_API sflat* QU::device_alloc_zero( unsigned num_items, const char* label) ; +template QUDARAP_API srec *QU::device_alloc_zero(unsigned num_items, const char *label); +template QUDARAP_API sseq *QU::device_alloc_zero(unsigned num_items, const char *label); +template QUDARAP_API stag *QU::device_alloc_zero(unsigned num_items, const char *label); +template QUDARAP_API sflat *QU::device_alloc_zero(unsigned num_items, const char *label); #endif - - - -template -void QU::device_memset( T* d, int value, unsigned num_items ) +template void QU::device_memset(T *d, int value, unsigned num_items) { - size_t size = num_items*sizeof(T) ; + size_t size = num_items * sizeof(T); - LOG_IF(info, MEMCHECK) - << " num_items " << std::setw(10) << num_items - << " sizeof(T) " << std::setw(10) << sizeof(T) - << " size " << std::setw(10) << size - ; + LOG_IF(info, MEMCHECK) << " num_items " << std::setw(10) << num_items << " sizeof(T) " << std::setw(10) << sizeof(T) + << " size " << std::setw(10) << size; - QUDA_CHECK( cudaMemset(d, value, size )); + QUDA_CHECK(cudaMemset(d, value, size)); } -template QUDARAP_API void QU::device_memset(int*, int, unsigned ) ; -template QUDARAP_API void QU::device_memset(quad4*, int, unsigned ) ; -template QUDARAP_API void QU::device_memset(quad6*, int, unsigned ) ; -template QUDARAP_API void QU::device_memset(sphoton*, int, unsigned ) ; -template QUDARAP_API void QU::device_memset(sphotonlite*, int, unsigned ) ; - - - - +template QUDARAP_API void QU::device_memset(int *, int, unsigned); +template QUDARAP_API void QU::device_memset(quad4 *, int, unsigned); +template QUDARAP_API void QU::device_memset(quad6 *, int, unsigned); +template QUDARAP_API void QU::device_memset(sphoton *, int, unsigned); +template QUDARAP_API void QU::device_memset(sphotonlite *, int, unsigned); - - - - - -template -void QU::device_free( T* d) +template void QU::device_free(T *d) { - LOG_IF(info, MEMCHECK) ; + LOG_IF(info, MEMCHECK); // HMM: could use salloc to find the label ? - QUDA_CHECK( cudaFree(d) ); + QUDA_CHECK(cudaFree(d)); } -template QUDARAP_API void QU::device_free(float*) ; -template QUDARAP_API void QU::device_free(double*) ; -template QUDARAP_API void QU::device_free(unsigned*) ; -template QUDARAP_API void QU::device_free(quad2*) ; -template QUDARAP_API void QU::device_free(quad4*) ; -template QUDARAP_API void QU::device_free(sphoton*) ; -template QUDARAP_API void QU::device_free(sphotonlite*) ; -template QUDARAP_API void QU::device_free(uchar4*) ; -template QUDARAP_API void QU::device_free(XORWOW*) ; -template QUDARAP_API void QU::device_free(Philox*) ; - - -template -int QU::copy_device_to_host( T* h, T* d, unsigned num_items) +template QUDARAP_API void QU::device_free(float *); +template QUDARAP_API void QU::device_free(double *); +template QUDARAP_API void QU::device_free(unsigned *); +template QUDARAP_API void QU::device_free(quad2 *); +template QUDARAP_API void QU::device_free(quad4 *); +template QUDARAP_API void QU::device_free(sphoton *); +template QUDARAP_API void QU::device_free(sphotonlite *); +template QUDARAP_API void QU::device_free(uchar4 *); +template QUDARAP_API void QU::device_free(XORWOW *); +template QUDARAP_API void QU::device_free(Philox *); + +template int QU::copy_device_to_host(T *h, T *d, unsigned num_items) { - if( d == nullptr ) std::cerr - << "QU::copy_device_to_host" - << " ERROR : device pointer is null " - << std::endl - ; + if (d == nullptr) + std::cerr << "QU::copy_device_to_host" << " ERROR : device pointer is null " << std::endl; - if( d == nullptr ) return 1 ; + if (d == nullptr) + return 1; - size_t size = num_items*sizeof(T) ; - QUDA_CHECK( cudaMemcpy(reinterpret_cast( h ), d , size, cudaMemcpyDeviceToHost )); + size_t size = num_items * sizeof(T); + QUDA_CHECK(cudaMemcpy(reinterpret_cast(h), d, size, cudaMemcpyDeviceToHost)); - return 0 ; + return 0; } - -template int QU::copy_device_to_host( int* h, int* d, unsigned num_items); -template int QU::copy_device_to_host( float* h, float* d, unsigned num_items); -template int QU::copy_device_to_host( double* h, double* d, unsigned num_items); -template int QU::copy_device_to_host( quad* h, quad* d, unsigned num_items); -template int QU::copy_device_to_host( quad2* h, quad2* d, unsigned num_items); -template int QU::copy_device_to_host( quad4* h, quad4* d, unsigned num_items); -template int QU::copy_device_to_host( sphoton* h, sphoton* d, unsigned num_items); -template int QU::copy_device_to_host( sphotonlite* h, sphotonlite* d, unsigned num_items); -template int QU::copy_device_to_host( quad6* h, quad6* d, unsigned num_items); -template int QU::copy_device_to_host( sstate* h, sstate* d, unsigned num_items); -template int QU::copy_device_to_host( XORWOW* h, XORWOW* d, unsigned num_items); -template int QU::copy_device_to_host( Philox* h, Philox* d, unsigned num_items); +template int QU::copy_device_to_host(int *h, int *d, unsigned num_items); +template int QU::copy_device_to_host(float *h, float *d, unsigned num_items); +template int QU::copy_device_to_host(double *h, double *d, unsigned num_items); +template int QU::copy_device_to_host(quad *h, quad *d, unsigned num_items); +template int QU::copy_device_to_host(quad2 *h, quad2 *d, unsigned num_items); +template int QU::copy_device_to_host(quad4 *h, quad4 *d, unsigned num_items); +template int QU::copy_device_to_host(sphoton *h, sphoton *d, unsigned num_items); +template int QU::copy_device_to_host(sphotonlite *h, sphotonlite *d, unsigned num_items); +template int QU::copy_device_to_host(quad6 *h, quad6 *d, unsigned num_items); +template int QU::copy_device_to_host(sstate *h, sstate *d, unsigned num_items); +template int QU::copy_device_to_host(XORWOW *h, XORWOW *d, unsigned num_items); +template int QU::copy_device_to_host(Philox *h, Philox *d, unsigned num_items); #ifndef PRODUCTION -template int QU::copy_device_to_host( srec* h, srec* d, unsigned num_items); -template int QU::copy_device_to_host( sseq* h, sseq* d, unsigned num_items); -template int QU::copy_device_to_host( stag* h, stag* d, unsigned num_items); -template int QU::copy_device_to_host( sflat* h, sflat* d, unsigned num_items); +template int QU::copy_device_to_host(srec *h, srec *d, unsigned num_items); +template int QU::copy_device_to_host(sseq *h, sseq *d, unsigned num_items); +template int QU::copy_device_to_host(stag *h, stag *d, unsigned num_items); +template int QU::copy_device_to_host(sflat *h, sflat *d, unsigned num_items); #endif - /** QU::copy_device_to_host_and_free ---------------------------------- @@ -534,60 +459,43 @@ results into the output array. **/ -template -void QU::copy_device_to_host_and_free( T* h, T* d, unsigned num_items, const char* label) +template void QU::copy_device_to_host_and_free(T *h, T *d, unsigned num_items, const char *label) { - size_t size = num_items*sizeof(T) ; - LOG(LEVEL) - << "copy " << num_items - << " sizeof(T) " << sizeof(T) - << " label " << ( label ? label : "-" ) - ; + size_t size = num_items * sizeof(T); + LOG(LEVEL) << "copy " << num_items << " sizeof(T) " << sizeof(T) << " label " << (label ? label : "-"); - QUDA_CHECK( cudaMemcpy(reinterpret_cast( h ), d , size, cudaMemcpyDeviceToHost )); - QUDA_CHECK( cudaFree(d) ); + QUDA_CHECK(cudaMemcpy(reinterpret_cast(h), d, size, cudaMemcpyDeviceToHost)); + QUDA_CHECK(cudaFree(d)); } - -template void QU::copy_device_to_host_and_free( float* h, float* d, unsigned num_items, const char* label ); -template void QU::copy_device_to_host_and_free( double* h, double* d, unsigned num_items, const char* label); -template void QU::copy_device_to_host_and_free( quad* h, quad* d, unsigned num_items, const char* label); -template void QU::copy_device_to_host_and_free( quad2* h, quad2* d, unsigned num_items, const char* label); -template void QU::copy_device_to_host_and_free( quad4* h, quad4* d, unsigned num_items, const char* label); -template void QU::copy_device_to_host_and_free( sphoton* h, sphoton* d, unsigned num_items, const char* label); -template void QU::copy_device_to_host_and_free( sphotonlite* h, sphotonlite* d, unsigned num_items, const char* label); -template void QU::copy_device_to_host_and_free( quad6* h, quad6* d, unsigned num_items, const char* label); -template void QU::copy_device_to_host_and_free( sstate* h, sstate* d, unsigned num_items, const char* label); - - - - - - - - - - - - -template -void QU::copy_host_to_device( T* d, const T* h, unsigned num_items) +template void QU::copy_device_to_host_and_free(float *h, float *d, unsigned num_items, const char *label); +template void QU::copy_device_to_host_and_free(double *h, double *d, unsigned num_items, const char *label); +template void QU::copy_device_to_host_and_free(quad *h, quad *d, unsigned num_items, const char *label); +template void QU::copy_device_to_host_and_free(quad2 *h, quad2 *d, unsigned num_items, const char *label); +template void QU::copy_device_to_host_and_free(quad4 *h, quad4 *d, unsigned num_items, const char *label); +template void QU::copy_device_to_host_and_free(sphoton *h, sphoton *d, unsigned num_items, const char *label); +template void QU::copy_device_to_host_and_free(sphotonlite *h, sphotonlite *d, unsigned num_items, + const char *label); +template void QU::copy_device_to_host_and_free(quad6 *h, quad6 *d, unsigned num_items, const char *label); +template void QU::copy_device_to_host_and_free(sstate *h, sstate *d, unsigned num_items, const char *label); + +template void QU::copy_host_to_device(T *d, const T *h, unsigned num_items) { - size_t size = num_items*sizeof(T) ; - QUDA_CHECK( cudaMemcpy(reinterpret_cast( d ), h , size, cudaMemcpyHostToDevice )); + size_t size = num_items * sizeof(T); + QUDA_CHECK(cudaMemcpy(reinterpret_cast(d), h, size, cudaMemcpyHostToDevice)); } -template void QU::copy_host_to_device( float* d, const float* h, unsigned num_items); -template void QU::copy_host_to_device( double* d, const double* h, unsigned num_items); -template void QU::copy_host_to_device( unsigned* d, const unsigned* h, unsigned num_items); -template void QU::copy_host_to_device( sevent* d, const sevent* h, unsigned num_items); -template void QU::copy_host_to_device( quad4* d, const quad4* h, unsigned num_items); -template void QU::copy_host_to_device( sphoton* d, const sphoton* h, unsigned num_items); -template void QU::copy_host_to_device( sphotonlite* d, const sphotonlite* h, unsigned num_items); -template void QU::copy_host_to_device( quad6* d, const quad6* h, unsigned num_items); -template void QU::copy_host_to_device( quad2* d, const quad2* h, unsigned num_items); -template void QU::copy_host_to_device( XORWOW* d, const XORWOW* h, unsigned num_items); -template void QU::copy_host_to_device( Philox* d, const Philox* h, unsigned num_items); +template void QU::copy_host_to_device(float *d, const float *h, unsigned num_items); +template void QU::copy_host_to_device(double *d, const double *h, unsigned num_items); +template void QU::copy_host_to_device(unsigned *d, const unsigned *h, unsigned num_items); +template void QU::copy_host_to_device(sevent *d, const sevent *h, unsigned num_items); +template void QU::copy_host_to_device(quad4 *d, const quad4 *h, unsigned num_items); +template void QU::copy_host_to_device(sphoton *d, const sphoton *h, unsigned num_items); +template void QU::copy_host_to_device(sphotonlite *d, const sphotonlite *h, unsigned num_items); +template void QU::copy_host_to_device(quad6 *d, const quad6 *h, unsigned num_items); +template void QU::copy_host_to_device(quad2 *d, const quad2 *h, unsigned num_items); +template void QU::copy_host_to_device(XORWOW *d, const XORWOW *h, unsigned num_items); +template void QU::copy_host_to_device(Philox *d, const Philox *h, unsigned num_items); /** QU::NumItems @@ -598,52 +506,50 @@ using the size of the template type and the shape of the NP array. **/ -template -unsigned QU::NumItems( const NP* a ) +template unsigned QU::NumItems(const NP *a) { - unsigned num_items = 0 ; + unsigned num_items = 0; - if( sizeof(T) == sizeof(float)*6*4 ) // looks like quad6 + if (sizeof(T) == sizeof(float) * 6 * 4) // looks like quad6 { - if(a->shape.size() == 3 ) + if (a->shape.size() == 3) { - assert( a->has_shape( -1, 6, 4) ); - num_items = a->shape[0] ; + assert(a->has_shape(-1, 6, 4)); + num_items = a->shape[0]; } } - else if( sizeof(T) == sizeof(float)*4*4 ) // looks like quad4 + else if (sizeof(T) == sizeof(float) * 4 * 4) // looks like quad4 { - if(a->shape.size() == 3 ) + if (a->shape.size() == 3) { - assert( a->has_shape( -1, 4, 4) ); - num_items = a->shape[0] ; + assert(a->has_shape(-1, 4, 4)); + num_items = a->shape[0]; } - else if(a->shape.size() == 4 ) + else if (a->shape.size() == 4) { - assert( a->shape[2] == 2 && a->shape[3] == 4 ); - num_items = a->shape[0]*a->shape[1] ; + assert(a->shape[2] == 2 && a->shape[3] == 4); + num_items = a->shape[0] * a->shape[1]; } } - else if( sizeof(T) == sizeof(float)*4*2 ) // looks like quad2 + else if (sizeof(T) == sizeof(float) * 4 * 2) // looks like quad2 { - if(a->shape.size() == 3 ) + if (a->shape.size() == 3) { - assert( a->has_shape( -1, 2, 4) ); - num_items = a->shape[0] ; + assert(a->has_shape(-1, 2, 4)); + num_items = a->shape[0]; } - else if(a->shape.size() == 4 ) + else if (a->shape.size() == 4) { - assert( a->shape[2] == 2 && a->shape[3] == 4 ); - num_items = a->shape[0]*a->shape[1] ; + assert(a->shape[2] == 2 && a->shape[3] == 4); + num_items = a->shape[0] * a->shape[1]; } } - return num_items ; + return num_items; } -template unsigned QU::NumItems(const NP* ); -template unsigned QU::NumItems(const NP* ); -template unsigned QU::NumItems(const NP* ); - +template unsigned QU::NumItems(const NP *); +template unsigned QU::NumItems(const NP *); +template unsigned QU::NumItems(const NP *); /** QU::copy_host_to_device @@ -658,29 +564,25 @@ Suggesting should generally use this via QEvt. **/ -template -unsigned QU::copy_host_to_device( T* d, const NP* a) +template unsigned QU::copy_host_to_device(T *d, const NP *a) { unsigned num_items = NumItems(a); - if( num_items == 0 ) + if (num_items == 0) { - LOG(fatal) << " failed to devine num_items for array " << a->sstr() << " with template type where sizeof(T) " << sizeof(T) ; + LOG(fatal) << " failed to devine num_items for array " << a->sstr() << " with template type where sizeof(T) " + << sizeof(T); } - if( num_items > 0 ) + if (num_items > 0) { - copy_host_to_device( d, (T*)a->bytes(), num_items ); + copy_host_to_device(d, (T *)a->bytes(), num_items); } - return num_items ; + return num_items; } -template unsigned QU::copy_host_to_device( quad2* , const NP* ); -template unsigned QU::copy_host_to_device( quad4* , const NP* ); -template unsigned QU::copy_host_to_device( quad6* , const NP* ); - - - - +template unsigned QU::copy_host_to_device(quad2 *, const NP *); +template unsigned QU::copy_host_to_device(quad4 *, const NP *); +template unsigned QU::copy_host_to_device(quad6 *, const NP *); /** QU::ConfigureLaunch @@ -691,79 +593,64 @@ QU::ConfigureLaunch **/ -void QU::ConfigureLaunch( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height ) // static +void QU::ConfigureLaunch(dim3 &numBlocks, dim3 &threadsPerBlock, unsigned width, unsigned height) // static { - threadsPerBlock.x = 512 ; - threadsPerBlock.y = 1 ; - threadsPerBlock.z = 1 ; + threadsPerBlock.x = 512; + threadsPerBlock.y = 1; + threadsPerBlock.z = 1; - numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; - numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ; - numBlocks.z = 1 ; + numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x; + numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y; + numBlocks.z = 1; // hmm this looks to not handle height other than 1 } -void QU::ConfigureLaunch1D( dim3& numBlocks, dim3& threadsPerBlock, unsigned num, unsigned threads_per_block ) // static +void QU::ConfigureLaunch1D(dim3 &numBlocks, dim3 &threadsPerBlock, unsigned num, unsigned threads_per_block) // static { - threadsPerBlock.x = threads_per_block ; - threadsPerBlock.y = 1 ; - threadsPerBlock.z = 1 ; + threadsPerBlock.x = threads_per_block; + threadsPerBlock.y = 1; + threadsPerBlock.z = 1; - numBlocks.x = (num + threadsPerBlock.x - 1) / threadsPerBlock.x ; - numBlocks.y = 1 ; - numBlocks.z = 1 ; + numBlocks.x = (num + threadsPerBlock.x - 1) / threadsPerBlock.x; + numBlocks.y = 1; + numBlocks.z = 1; } - - -void QU::ConfigureLaunch2D( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height ) // static +void QU::ConfigureLaunch2D(dim3 &numBlocks, dim3 &threadsPerBlock, unsigned width, unsigned height) // static { - threadsPerBlock.x = 16 ; - threadsPerBlock.y = 16 ; - threadsPerBlock.z = 1 ; + threadsPerBlock.x = 16; + threadsPerBlock.y = 16; + threadsPerBlock.z = 1; - numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; - numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ; - numBlocks.z = 1 ; + numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x; + numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y; + numBlocks.z = 1; } - -void QU::ConfigureLaunch16( dim3& numBlocks, dim3& threadsPerBlock ) // static +void QU::ConfigureLaunch16(dim3 &numBlocks, dim3 &threadsPerBlock) // static { - threadsPerBlock.x = 16 ; - threadsPerBlock.y = 1 ; - threadsPerBlock.z = 1 ; + threadsPerBlock.x = 16; + threadsPerBlock.y = 1; + threadsPerBlock.z = 1; - numBlocks.x = 1 ; - numBlocks.y = 1 ; - numBlocks.z = 1 ; + numBlocks.x = 1; + numBlocks.y = 1; + numBlocks.z = 1; } - -std::string QU::Desc(const dim3& d, int w) // static +std::string QU::Desc(const dim3 &d, int w) // static { - std::stringstream ss ; - ss << "( " - << std::setw(w) << d.x - << " " - << std::setw(w) << d.y - << " " - << std::setw(w) << d.z - << ")" - ; + std::stringstream ss; + ss << "( " << std::setw(w) << d.x << " " << std::setw(w) << d.y << " " << std::setw(w) << d.z << ")"; std::string s = ss.str(); - return s ; + return s; } -std::string QU::DescLaunch( const dim3& numBlocks, const dim3& threadsPerBlock ) // static +std::string QU::DescLaunch(const dim3 &numBlocks, const dim3 &threadsPerBlock) // static { - std::stringstream ss ; - ss - << " numBlocks " << Desc(numBlocks,4) - << " threadsPerBlock " << Desc(threadsPerBlock, 4) - ; + std::stringstream ss; + ss << " numBlocks " << Desc(numBlocks, 4) << " threadsPerBlock " << Desc(threadsPerBlock, 4); std::string s = ss.str(); - return s ; + return s; } - diff --git a/qudarap/QWls.cc b/qudarap/QWls.cc new file mode 100644 index 000000000..e3888d7b5 --- /dev/null +++ b/qudarap/QWls.cc @@ -0,0 +1,131 @@ +#include +#include +#include + +#include "scuda.h" +#include "squad.h" + +#include "NP.hh" +#include "SLOG.hh" +#include "ssys.h" + +#include "QTex.hh" +#include "QU.hh" +#include "QUDA_CHECK.h" +#include "QWls.hh" + +#include "qwls.h" + +const plog::Severity QWls::LEVEL = SLOG::EnvLevel("QWls", "DEBUG"); + +const QWls *QWls::INSTANCE = nullptr; +const QWls *QWls::Get() +{ + return INSTANCE; +} + +/** +QWls::QWls +------------ + +1. Narrows ICDF from double to float if needed +2. Uploads ICDF into GPU texture +3. Creates qwls instance with device pointers and uploads it + +**/ + +QWls::QWls(const NP *wls_icdf, const NP *mat_map, const NP *time_constants, unsigned hd_factor) + : dsrc(wls_icdf->ebyte == 8 ? wls_icdf : nullptr), src(wls_icdf->ebyte == 4 ? wls_icdf : NP::MakeNarrow(dsrc)), + tex(MakeWlsTex(src, hd_factor)), + wls(MakeInstance(tex, mat_map, time_constants, hd_factor, time_constants->shape[0])), + d_wls(QU::UploadArray(wls, 1, "QWls::QWls/d_wls")) +{ + INSTANCE = this; +} + +/** +QWls::MakeWlsTex +------------------- + +Creates a 2D CUDA texture from the ICDF array. +Shape: (num_wls*3, 4096, 1) where 3 = HD layers per material. + +**/ + +QTex *QWls::MakeWlsTex(const NP *src, unsigned hd_factor) +{ + assert(src); + assert(src->shape.size() == 3); + + unsigned ni = src->shape[0]; // height: num_wls * 3 + unsigned nj = src->shape[1]; // width: 4096 + unsigned nk = src->shape[2]; // 1 + + assert(nk == 1); + assert(nj == 4096); + assert(ni % 3 == 0); // must be multiple of 3 (3 HD layers per material) + assert(src->uifc == 'f' && src->ebyte == 4); + + unsigned ny = ni; // height + unsigned nx = nj; // width + + bool normalizedCoords = true; + QTex *tx = new QTex(nx, ny, src->cvalues(), 'L', normalizedCoords, src); + + tx->setHDFactor(hd_factor); + tx->uploadMeta(); + + LOG(LEVEL) << " src " << src->desc() << " nx (width) " << nx << " ny (height) " << ny << " tx.HDFactor " + << tx->getHDFactor(); + + return tx; +} + +/** +QWls::MakeInstance +--------------------- + +Creates the host-side qwls struct populated with device pointers. +Uploads material_map and time_constants to device memory. + +**/ + +qwls *QWls::MakeInstance(const QTex *tex, const NP *mat_map, const NP *time_constants, unsigned hd_factor, + unsigned num_wls) +{ + assert(mat_map); + assert(time_constants); + assert(mat_map->uifc == 'i' && mat_map->ebyte == 4); + assert(time_constants->uifc == 'f' && time_constants->ebyte == 4); + + qwls *w = new qwls; + w->wls_tex = tex->texObj; + w->hd_factor = hd_factor; + w->num_wls = num_wls; + w->tex_height = tex->height; + + // Upload material_map to device + unsigned num_mat = mat_map->shape[0]; + int *d_mat_map = nullptr; + size_t mat_map_size = num_mat * sizeof(int); + QUDA_CHECK(cudaMalloc(reinterpret_cast(&d_mat_map), mat_map_size)); + QUDA_CHECK(cudaMemcpy(d_mat_map, mat_map->cvalues(), mat_map_size, cudaMemcpyHostToDevice)); + w->material_map = d_mat_map; + + // Upload time_constants to device + float *d_tc = nullptr; + size_t tc_size = num_wls * sizeof(float); + QUDA_CHECK(cudaMalloc(reinterpret_cast(&d_tc), tc_size)); + QUDA_CHECK(cudaMemcpy(d_tc, time_constants->cvalues(), tc_size, cudaMemcpyHostToDevice)); + w->time_constants = d_tc; + + return w; +} + +std::string QWls::desc() const +{ + std::stringstream ss; + ss << "QWls" << " dsrc " << (dsrc ? dsrc->desc() : "-") << " src " << (src ? src->desc() : "-") << " tex " + << (tex ? tex->desc() : "-"); + return ss.str(); +} diff --git a/qudarap/QWls.hh b/qudarap/QWls.hh new file mode 100644 index 000000000..3134eba2b --- /dev/null +++ b/qudarap/QWls.hh @@ -0,0 +1,41 @@ +#pragma once + +#include "QUDARAP_API_EXPORT.hh" +#include "plog/Severity.h" +#include + +struct NP; +template struct QTex; +struct qwls; + +/** +QWls : Host-side WLS ICDF Texture Upload +============================================ + +Uploads the WLS inverse CDF array into a GPU texture and creates +the device-side qwls struct with material mapping and time constants. + +Follows the same pattern as QScint for scintillation ICDF textures. + +**/ + +struct QUDARAP_API QWls +{ + static const plog::Severity LEVEL; + static const QWls *INSTANCE; + static const QWls *Get(); + + static QTex *MakeWlsTex(const NP *src, unsigned hd_factor); + static qwls *MakeInstance(const QTex *tex, const NP *mat_map, const NP *time_constants, unsigned hd_factor, + unsigned num_wls); + + const NP *dsrc; // original double-precision ICDF + const NP *src; // narrowed float ICDF + QTex *tex; // GPU texture + qwls *wls; // host-side instance (with device pointers) + qwls *d_wls; // device copy of qwls struct + + QWls(const NP *wls_icdf, const NP *mat_map, const NP *time_constants, unsigned hd_factor); + + std::string desc() const; +}; diff --git a/qudarap/qsim.h b/qudarap/qsim.h index f8a94d091..34fcc3a66 100644 --- a/qudarap/qsim.h +++ b/qudarap/qsim.h @@ -23,178 +23,183 @@ Canonical use is from CSGOptiX/CSGOptiX7.cu:simulate **/ #if defined(__CUDACC__) || defined(__CUDABE__) - #define QSIM_METHOD __device__ +#define QSIM_METHOD __device__ #else - #define QSIM_METHOD +#define QSIM_METHOD #endif #include "OpticksGenstep.h" #include "OpticksPhoton.h" +#include "sc4u.h" #include "sflow.h" +#include "sphoton.h" #include "sqat4.h" -#include "sc4u.h" #include "sxyz.h" -#include "sphoton.h" -#include "storch.h" #include "scarrier.h" #include "sevent.h" -#include "sstate.h" #include "smatsur.h" - +#include "sstate.h" +#include "storch.h" #ifndef PRODUCTION #include "srec.h" #include "sseq.h" #include "stag.h" #ifdef DEBUG_LOGF -#define KLUDGE_FASTMATH_LOGF(u) (u < 0.998f ? __logf(u) : __logf(u) - 0.46735790f*1e-7f ) +#define KLUDGE_FASTMATH_LOGF(u) (u < 0.998f ? __logf(u) : __logf(u) - 0.46735790f * 1e-7f) #endif #endif #include "sctx.h" -#include "qrng.h" #include "qbase.h" -#include "qprop.h" -#include "qmultifilm.h" #include "qbnd.h" -#include "qscint.h" #include "qcerenkov.h" +#include "qmultifilm.h" #include "qpmt.h" +#include "qprop.h" +#include "qrng.h" +#include "qscint.h" +#include "qwls.h" #include "tcomplex.h" - -struct qcerenkov ; +struct qcerenkov; struct qsim { - qbase* base ; - sevent* evt ; - qrng* rng ; - qbnd* bnd ; - qmultifilm* multifilm; - qcerenkov* cerenkov ; - qscint* scint ; - qpmt* pmt ; + qbase *base; + sevent *evt; + qrng *rng; + qbnd *bnd; + qmultifilm *multifilm; + qcerenkov *cerenkov; + qscint *scint; + qwls *wls; + qpmt *pmt; #if defined(__CUDACC__) || defined(__CUDABE__) #else qsim(); // instanciated on CPU (see QSim::init_sim) and copied to device so no ctor in device code #endif - QSIM_METHOD void generate_photon_dummy( sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ; + QSIM_METHOD void generate_photon_dummy(sphoton &p, RNG &rng, const quad6 &gs, unsigned long long photon_id, + unsigned genstep_id) const; QSIM_METHOD static float3 uniform_sphere(const float u0, const float u1); - QSIM_METHOD static float RandGaussQ_shoot( RNG& rng, float mean, float stdDev ); - QSIM_METHOD static void SmearNormal_SigmaAlpha( RNG& rng, float3* smeared_normal, const float3* direction, const float3* normal, float sigma_alpha, const sctx& ctx ); - QSIM_METHOD static void SmearNormal_Polish( RNG& rng, float3* smeared_normal, const float3* direction, const float3* normal, float polish , const sctx& ctx ); + QSIM_METHOD static float RandGaussQ_shoot(RNG &rng, float mean, float stdDev); + QSIM_METHOD static void SmearNormal_SigmaAlpha(RNG &rng, float3 *smeared_normal, const float3 *direction, + const float3 *normal, float sigma_alpha, const sctx &ctx); + QSIM_METHOD static void SmearNormal_Polish(RNG &rng, float3 *smeared_normal, const float3 *direction, + const float3 *normal, float polish, const sctx &ctx); -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) - QSIM_METHOD static float3 uniform_sphere(RNG& rng); +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) + QSIM_METHOD static float3 uniform_sphere(RNG &rng); #endif #if defined(__CUDACC__) || defined(__CUDABE__) - QSIM_METHOD float4 multifilm_lookup(unsigned pmtType, float nm, float aoi); + QSIM_METHOD float4 multifilm_lookup(unsigned pmtType, float nm, float aoi); #endif -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) - QSIM_METHOD static void lambertian_direction(float3* dir, const float3* normal, float orient, RNG& rng, sctx& ctx ); - QSIM_METHOD static void random_direction_marsaglia(float3* dir, RNG& rng, sctx& ctx ); - QSIM_METHOD void rayleigh_scatter(RNG& rng, sctx& ctx ); - QSIM_METHOD int propagate_to_boundary( unsigned& flag, RNG& rng, sctx& ctx ); +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) + QSIM_METHOD static void lambertian_direction(float3 *dir, const float3 *normal, float orient, RNG &rng, sctx &ctx); + QSIM_METHOD static void random_direction_marsaglia(float3 *dir, RNG &rng, sctx &ctx); + QSIM_METHOD void rayleigh_scatter(RNG &rng, sctx &ctx); + QSIM_METHOD int propagate_to_boundary(unsigned &flag, RNG &rng, sctx &ctx); #endif -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) - QSIM_METHOD int propagate_at_boundary( unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance=-1.f ) const ; - QSIM_METHOD int propagate_at_boundary_with_T( unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const ; +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) + QSIM_METHOD int propagate_at_boundary(unsigned &flag, RNG &rng, sctx &ctx, float theTransmittance = -1.f) const; + QSIM_METHOD int propagate_at_boundary_with_T(unsigned &flag, RNG &rng, sctx &ctx, float theTransmittance) const; #endif #if defined(__CUDACC__) || defined(__CUDABE__) - QSIM_METHOD int propagate_at_surface_MultiFilm(unsigned& flag, RNG& rng, sctx& ctx ); + QSIM_METHOD int propagate_at_surface_MultiFilm(unsigned &flag, RNG &rng, sctx &ctx); #endif -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) - QSIM_METHOD int propagate_at_surface( unsigned& flag, RNG& rng, sctx& ctx ); - QSIM_METHOD int propagate_at_surface_Detect( unsigned& flag, RNG& rng, sctx& ctx ) const ; -#if defined( WITH_CUSTOM4 ) - QSIM_METHOD int propagate_at_surface_CustomART( unsigned& flag, RNG& rng, sctx& ctx ) const ; +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) + QSIM_METHOD int propagate_at_surface(unsigned &flag, RNG &rng, sctx &ctx); + QSIM_METHOD int propagate_at_surface_Detect(unsigned &flag, RNG &rng, sctx &ctx) const; +#if defined(WITH_CUSTOM4) + QSIM_METHOD int propagate_at_surface_CustomART(unsigned &flag, RNG &rng, sctx &ctx) const; #endif #endif -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) - QSIM_METHOD void reflect_diffuse( RNG& rng, sctx& ctx ); - QSIM_METHOD void reflect_specular( RNG& rng, sctx& ctx ); +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) + QSIM_METHOD void reflect_diffuse(RNG &rng, sctx &ctx); + QSIM_METHOD void reflect_specular(RNG &rng, sctx &ctx); - QSIM_METHOD void fake_propagate( sphoton& p, const quad2* mock_prd, RNG& rng, unsigned long long idx ); - QSIM_METHOD int propagate(const int bounce, RNG& rng, sctx& ctx ); + QSIM_METHOD void fake_propagate(sphoton &p, const quad2 *mock_prd, RNG &rng, unsigned long long idx); + QSIM_METHOD int propagate(const int bounce, RNG &rng, sctx &ctx); - QSIM_METHOD void hemisphere_polarized( unsigned polz, bool inwards, RNG& rng, sctx& ctx ); - QSIM_METHOD void generate_photon_simtrace( quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ; - QSIM_METHOD void generate_photon_simtrace_frame( quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ; - QSIM_METHOD void generate_photon( sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ; + QSIM_METHOD void hemisphere_polarized(unsigned polz, bool inwards, RNG &rng, sctx &ctx); + QSIM_METHOD void generate_photon_simtrace(quad4 &p, RNG &rng, const quad6 &gs, unsigned long long photon_id, + unsigned genstep_id) const; + QSIM_METHOD void generate_photon_simtrace_frame(quad4 &p, RNG &rng, const quad6 &gs, unsigned long long photon_id, + unsigned genstep_id) const; + QSIM_METHOD void generate_photon(sphoton &p, RNG &rng, const quad6 &gs, unsigned long long photon_id, + unsigned genstep_id) const; #endif }; // CTOR #if defined(__CUDACC__) || defined(__CUDABE__) #else -inline qsim::qsim() // instanciated on CPU (see QSim::init_sim) and copied to device so no ctor in device code - : - base(nullptr), - evt(nullptr), - rng(nullptr), - bnd(nullptr), - multifilm(nullptr), - cerenkov(nullptr), - scint(nullptr), - pmt(nullptr) - { - } +inline qsim::qsim() // instanciated on CPU (see QSim::init_sim) and copied to device so no ctor in device code + : base(nullptr), evt(nullptr), rng(nullptr), bnd(nullptr), multifilm(nullptr), cerenkov(nullptr), scint(nullptr), + wls(nullptr), pmt(nullptr) +{ +} #endif -inline QSIM_METHOD void qsim::generate_photon_dummy(sphoton& p_, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const +inline QSIM_METHOD void qsim::generate_photon_dummy(sphoton &p_, RNG &rng, const quad6 &gs, + unsigned long long photon_id, unsigned genstep_id) const { - quad4& p = (quad4&)p_ ; + quad4 &p = (quad4 &)p_; #ifndef PRODUCTION printf("//qsim::generate_photon_dummy photon_id %3lld genstep_id %3d gs.q0.i ( gencode:%3d %3d %3d %3d ) \n", - photon_id, - genstep_id, - gs.q0.i.x, - gs.q0.i.y, - gs.q0.i.z, - gs.q0.i.w - ); -#endif - p.q0.i.x = 1 ; p.q0.i.y = 2 ; p.q0.i.z = 3 ; p.q0.i.w = 4 ; - p.q1.i.x = 1 ; p.q1.i.y = 2 ; p.q1.i.z = 3 ; p.q1.i.w = 4 ; - p.q2.i.x = 1 ; p.q2.i.y = 2 ; p.q2.i.z = 3 ; p.q2.i.w = 4 ; - p.q3.i.x = 1 ; p.q3.i.y = 2 ; p.q3.i.z = 3 ; p.q3.i.w = 4 ; + photon_id, genstep_id, gs.q0.i.x, gs.q0.i.y, gs.q0.i.z, gs.q0.i.w); +#endif + p.q0.i.x = 1; + p.q0.i.y = 2; + p.q0.i.z = 3; + p.q0.i.w = 4; + p.q1.i.x = 1; + p.q1.i.y = 2; + p.q1.i.z = 3; + p.q1.i.w = 4; + p.q2.i.x = 1; + p.q2.i.y = 2; + p.q2.i.z = 3; + p.q2.i.w = 4; + p.q3.i.x = 1; + p.q3.i.y = 2; + p.q3.i.z = 3; + p.q3.i.w = 4; p.set_flag(TORCH); } inline QSIM_METHOD float3 qsim::uniform_sphere(const float u0, const float u1) { - float phi = u0*2.f*M_PIf; - float cosTheta = 2.f*u1 - 1.f ; // -1.f -> 1.f - float sinTheta = sqrtf(1.f-cosTheta*cosTheta); - return make_float3(cosf(phi)*sinTheta, sinf(phi)*sinTheta, cosTheta); + float phi = u0 * 2.f * M_PIf; + float cosTheta = 2.f * u1 - 1.f; // -1.f -> 1.f + float sinTheta = sqrtf(1.f - cosTheta * cosTheta); + return make_float3(cosf(phi) * sinTheta, sinf(phi) * sinTheta, cosTheta); } - -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) /** qsim::uniform_sphere --------------------- **/ -inline QSIM_METHOD float3 qsim::uniform_sphere(RNG& rng) +inline QSIM_METHOD float3 qsim::uniform_sphere(RNG &rng) { - float phi = curand_uniform(&rng)*2.f*M_PIf; - float cosTheta = 2.f*curand_uniform(&rng) - 1.f ; // -1.f -> 1.f - float sinTheta = sqrtf(1.f-cosTheta*cosTheta); - return make_float3(cosf(phi)*sinTheta, sinf(phi)*sinTheta, cosTheta); + float phi = curand_uniform(&rng) * 2.f * M_PIf; + float cosTheta = 2.f * curand_uniform(&rng) - 1.f; // -1.f -> 1.f + float sinTheta = sqrtf(1.f - cosTheta * cosTheta); + return make_float3(cosf(phi) * sinTheta, sinf(phi) * sinTheta, cosTheta); } /** @@ -210,15 +215,14 @@ See:: g4-cls G4MTRandGaussQ **/ -inline QSIM_METHOD float qsim::RandGaussQ_shoot( RNG& rng, float mean, float stdDev ) +inline QSIM_METHOD float qsim::RandGaussQ_shoot(RNG &rng, float mean, float stdDev) { - float u2 = 2.f*curand_uniform(&rng) ; - float v = -M_SQRT2f*erfcinvf(u2)*stdDev + mean ; - //printf("//qsim.RandGaussQ_shoot mean %10.5f stdDev %10.5f u2 %10.5f v %10.5f \n", mean, stdDev, u2, v ) ; - return v ; + float u2 = 2.f * curand_uniform(&rng); + float v = -M_SQRT2f * erfcinvf(u2) * stdDev + mean; + // printf("//qsim.RandGaussQ_shoot mean %10.5f stdDev %10.5f u2 %10.5f v %10.5f \n", mean, stdDev, u2, v ) ; + return v; } - /** qsim::SmearNormal_SigmaAlpha ------------------------------ @@ -251,70 +255,71 @@ TODO: full simulation run with breakpoint "BP=C4OpBoundaryProcess::GetFacetNorma **/ -inline QSIM_METHOD void qsim::SmearNormal_SigmaAlpha( - RNG& rng, - float3* smeared_normal, - const float3* direction, - const float3* normal, - float sigma_alpha, - const sctx& ctx - ) +inline QSIM_METHOD void qsim::SmearNormal_SigmaAlpha(RNG &rng, float3 *smeared_normal, const float3 *direction, + const float3 *normal, float sigma_alpha, const sctx &ctx) { #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG) - bool dump = ctx.pidx == -1 ; + bool dump = ctx.pidx == -1; #endif - if(sigma_alpha == 0.f) + if (sigma_alpha == 0.f) { - *smeared_normal = *normal ; - return ; + *smeared_normal = *normal; + return; } - float f_max = fminf(1.f,4.f*sigma_alpha); + float f_max = fminf(1.f, 4.f * sigma_alpha); #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG) - if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG sigma_alpha %10.5f f_max %10.5f \n", sigma_alpha, f_max ); + if (dump) + printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG sigma_alpha %10.5f f_max %10.5f \n", sigma_alpha, + f_max); #endif - float alpha, sin_alpha, phi, u0, u1, u2 ; - bool reject_alpha ; - bool reject_dir ; + float alpha, sin_alpha, phi, u0, u1, u2; + bool reject_alpha; + bool reject_dir; - do { - do { - //alpha = RandGaussQ_shoot(rng, 0.f, sigma_alpha ); // mean:0.f stdDev:sigma_alpha - u0 = curand_uniform(&rng) ; - alpha = -M_SQRT2f*erfcinvf(2.f*u0)*sigma_alpha ; + do + { + do + { + // alpha = RandGaussQ_shoot(rng, 0.f, sigma_alpha ); // mean:0.f stdDev:sigma_alpha + u0 = curand_uniform(&rng); + alpha = -M_SQRT2f * erfcinvf(2.f * u0) * sigma_alpha; sin_alpha = sinf(alpha); - u1 = curand_uniform(&rng) ; - reject_alpha = alpha >= M_PIf/2.f || (u1*f_max > sin_alpha) ; + u1 = curand_uniform(&rng); + reject_alpha = alpha >= M_PIf / 2.f || (u1 * f_max > sin_alpha); #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG) - if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u0 %10.5f alpha %10.5f sin_alpha %10.5f u1 %10.5f u1*f_max %10.5f (u1*f_max > sin_alpha) %d reject_alpha %d \n", - u0, alpha, sin_alpha, u1, u1*f_max, (u1*f_max > sin_alpha), reject_alpha ); + if (dump) + printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u0 %10.5f alpha %10.5f sin_alpha %10.5f u1 " + "%10.5f u1*f_max %10.5f (u1*f_max > sin_alpha) %d reject_alpha %d \n", + u0, alpha, sin_alpha, u1, u1 * f_max, (u1 * f_max > sin_alpha), reject_alpha); // theres lots of alpha rejected : eg all -ve sin_alpha #endif - } while( reject_alpha ) ; + } while (reject_alpha); - u2 = curand_uniform(&rng) ; - phi = u2*M_PIf*2.f ; + u2 = curand_uniform(&rng); + phi = u2 * M_PIf * 2.f; - smeared_normal->x = sin_alpha * cosf(phi) ; - smeared_normal->y = sin_alpha * sinf(phi) ; - smeared_normal->z = cosf(alpha) ; + smeared_normal->x = sin_alpha * cosf(phi); + smeared_normal->y = sin_alpha * sinf(phi); + smeared_normal->z = cosf(alpha); smath::rotateUz(*smeared_normal, *normal); - reject_dir = dot(*smeared_normal, *direction ) >= 0.f ; + reject_dir = dot(*smeared_normal, *direction) >= 0.f; // reject smears that move the normal into same hemi as direction #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG) - if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u2 %10.5f phi %10.5f smeared_normal ( %10.5f, %10.5f, %10.5f) reject_dir %d \n", - u2, phi, smeared_normal->x, smeared_normal->y, smeared_normal->z, reject_dir ); + if (dump) + printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u2 %10.5f phi %10.5f smeared_normal ( %10.5f, " + "%10.5f, %10.5f) reject_dir %d \n", + u2, phi, smeared_normal->x, smeared_normal->y, smeared_normal->z, reject_dir); #endif - - } while( reject_dir ) ; + } while (reject_dir); } /** @@ -325,53 +330,43 @@ CAUTION : THIS CURRENTLY NOT USED BY ANYTHING OTHER THAN TESTS : SEE DETAILS ABO **/ -inline QSIM_METHOD void qsim::SmearNormal_Polish( - RNG& rng, - float3* smeared_normal, - const float3* direction, - const float3* normal, - float polish, - const sctx& ctx - ) +inline QSIM_METHOD void qsim::SmearNormal_Polish(RNG &rng, float3 *smeared_normal, const float3 *direction, + const float3 *normal, float polish, const sctx &ctx) { #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG) - bool dump = ctx.pidx == -1 ; + bool dump = ctx.pidx == -1; #endif - if(polish == 1.f) + if (polish == 1.f) { - *smeared_normal = *normal ; - return ; + *smeared_normal = *normal; + return; } - float u0, u1, u2 ; - float3 smear ; - bool reject_mag ; - bool reject_dir ; + float u0, u1, u2; + float3 smear; + bool reject_mag; + bool reject_dir; - do { - do { + do + { + do + { u0 = curand_uniform(&rng); - u1 = curand_uniform(&rng) ; - u2 = curand_uniform(&rng) ; - smear.x = 2.f*u0 - 1.f ; - smear.y = 2.f*u1 - 1.f ; - smear.z = 2.f*u2 - 1.f ; - reject_mag = length(smear) > 1.f ; // HMM: could this use just dot(smear, smear) ? - } - while( reject_mag ); - - *smeared_normal = *normal + (1.f-polish)*smear; - reject_dir = dot(*smeared_normal, *direction) >= 0.f ; - } - while( reject_dir ); + u1 = curand_uniform(&rng); + u2 = curand_uniform(&rng); + smear.x = 2.f * u0 - 1.f; + smear.y = 2.f * u1 - 1.f; + smear.z = 2.f * u2 - 1.f; + reject_mag = length(smear) > 1.f; // HMM: could this use just dot(smear, smear) ? + } while (reject_mag); + + *smeared_normal = *normal + (1.f - polish) * smear; + reject_dir = dot(*smeared_normal, *direction) >= 0.f; + } while (reject_dir); *smeared_normal = normalize(*smeared_normal); } - - - - #endif #if defined(__CUDACC__) || defined(__CUDABE__) @@ -427,59 +422,56 @@ as opposed to local stack float3 : as this keeps changing the dir before arriving at the final one **/ -inline QSIM_METHOD void qsim::lambertian_direction(float3* dir, const float3* normal, float orient, RNG& rng, sctx& ctx ) +inline QSIM_METHOD void qsim::lambertian_direction(float3 *dir, const float3 *normal, float orient, RNG &rng, sctx &ctx) { #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - unsigned long long PIDX = 0xffffffffff ; - if(ctx.pidx == PIDX ) + unsigned long long PIDX = 0xffffffffff; + if (ctx.pidx == PIDX) { - printf("//qsim.lambertian_direction.head pidx %7lld : normal = np.array([%10.5f,%10.5f,%10.5f]) ; orient = %10.5f \n", - ctx.pidx, normal->x, normal->y, normal->z, orient ); + printf("//qsim.lambertian_direction.head pidx %7lld : normal = np.array([%10.5f,%10.5f,%10.5f]) ; orient = " + "%10.5f \n", + ctx.pidx, normal->x, normal->y, normal->z, orient); } #endif - float ndotv ; - int count = 0 ; - float u ; + float ndotv; + int count = 0; + float u; do { - count++ ; + count++; random_direction_marsaglia(dir, rng, ctx); // sets dir to random point on unit sphere - ndotv = dot( *dir, *normal )*orient ; - if( ndotv < 0.f ) + ndotv = dot(*dir, *normal) * orient; + if (ndotv < 0.f) { - *dir = -1.f*(*dir) ; - ndotv = -1.f*ndotv ; + *dir = -1.f * (*dir); + ndotv = -1.f * ndotv; } // when random dir is in opposite hemisphere to oriented normal // flip the dir into same hemi and ndotv - u = curand_uniform(&rng) ; + u = curand_uniform(&rng); #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == PIDX) + if (ctx.pidx == PIDX) { - printf("//qsim.lambertian_direction.loop pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d ; ndotv = %10.5f ; u = %10.5f \n", - ctx.pidx, dir->x, dir->y, dir->z, count, ndotv, u ); - + printf("//qsim.lambertian_direction.loop pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d " + "; ndotv = %10.5f ; u = %10.5f \n", + ctx.pidx, dir->x, dir->y, dir->z, count, ndotv, u); } #endif - } - while (!(u < ndotv) && (count < 1024)) ; + } while (!(u < ndotv) && (count < 1024)); // distribution looks pretty similar without the while loop - #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == PIDX) + if (ctx.pidx == PIDX) { - printf("//qsim.lambertian_direction.tail pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d ; ndotv = %10.5f \n", - ctx.pidx, dir->x, dir->y, dir->z, count, ndotv ); - + printf("//qsim.lambertian_direction.tail pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d ; " + "ndotv = %10.5f \n", + ctx.pidx, dir->x, dir->y, dir->z, count, ndotv); } #endif - - } /** @@ -547,32 +539,29 @@ So that means the random 3D (x,y,z) points are on the unit sphere. **/ - -inline QSIM_METHOD void qsim::random_direction_marsaglia(float3* dir, RNG& rng, sctx& ctx ) +inline QSIM_METHOD void qsim::random_direction_marsaglia(float3 *dir, RNG &rng, sctx &ctx) { // NB: no use of ctx.tagr so this has not been random aligned - float u0, u1 ; - float u, v, b, a ; + float u0, u1; + float u, v, b, a; do { u0 = curand_uniform(&rng); u1 = curand_uniform(&rng); - //if( idx == 0u ) printf("//qsim.random_direction_marsaglia pidx %7lld u0 %10.4f u1 %10.4f \n", ctx.pidx, u0, u1 ); - u = 2.f*u0 - 1.f ; - v = 2.f*u1 - 1.f ; - b = u*u + v*v ; - } - while( b > 1.f ) ; - - a = 2.f*sqrtf( 1.f - b ); - - dir->x = a*u ; - dir->y = a*v ; - dir->z = 2.f*b - 1.f ; + // if( idx == 0u ) printf("//qsim.random_direction_marsaglia pidx %7lld u0 %10.4f u1 %10.4f \n", ctx.pidx, u0, + // u1 ); + u = 2.f * u0 - 1.f; + v = 2.f * u1 - 1.f; + b = u * u + v * v; + } while (b > 1.f); + + a = 2.f * sqrtf(1.f - b); + + dir->x = a * u; + dir->y = a * v; + dir->z = 2.f * b - 1.f; } - - /** qsim::rayleigh_scatter ------------------------------ @@ -598,73 +587,74 @@ Transverse wave nature means:: **/ -inline QSIM_METHOD void qsim::rayleigh_scatter(RNG& rng, sctx& ctx ) +inline QSIM_METHOD void qsim::rayleigh_scatter(RNG &rng, sctx &ctx) { - sphoton& p = ctx.p ; - float3 direction ; - float3 polarization ; + sphoton &p = ctx.p; + float3 direction; + float3 polarization; - bool looping(true) ; + bool looping(true); do { - float u0 = curand_uniform(&rng) ; - float u1 = curand_uniform(&rng) ; - float u2 = curand_uniform(&rng) ; - float u3 = curand_uniform(&rng) ; - float u4 = curand_uniform(&rng) ; + float u0 = curand_uniform(&rng); + float u1 = curand_uniform(&rng); + float u2 = curand_uniform(&rng); + float u3 = curand_uniform(&rng); + float u4 = curand_uniform(&rng); #if !defined(PRODUCTION) && defined(DEBUG_TAG) - stagr& tagr = ctx.tagr ; // UNTESTED + stagr &tagr = ctx.tagr; // UNTESTED tagr.add(stag_sc, u0); tagr.add(stag_sc, u1); tagr.add(stag_sc, u2); tagr.add(stag_sc, u3); tagr.add(stag_sc, u4); #endif - float cosTheta = u0 ; - float sinTheta = sqrtf(1.0f-u0*u0); - if(u1 < 0.5f ) cosTheta = -cosTheta ; + float cosTheta = u0; + float sinTheta = sqrtf(1.0f - u0 * u0); + if (u1 < 0.5f) + cosTheta = -cosTheta; // could use uniform_sphere here : but not doing so to follow G4OpRayleigh more closely - float sinPhi ; - float cosPhi ; + float sinPhi; + float cosPhi; -#if defined(MOCK_CURAND ) || defined(MOCK_CUDA) +#if defined(MOCK_CURAND) || defined(MOCK_CUDA) //__sincosf(2.f*M_PIf*u2,&sinPhi,&cosPhi); // apple extension - float phi = 2.f*M_PIf*u2 ; + float phi = 2.f * M_PIf * u2; sinPhi = sinf(phi); cosPhi = cosf(phi); #else - sincosf(2.f*M_PIf*u2,&sinPhi,&cosPhi); + sincosf(2.f * M_PIf * u2, &sinPhi, &cosPhi); #endif direction.x = sinTheta * cosPhi; direction.y = sinTheta * sinPhi; - direction.z = cosTheta ; + direction.z = cosTheta; - smath::rotateUz(direction, p.mom ); + smath::rotateUz(direction, p.mom); - float constant = -dot(direction, p.pol ); + float constant = -dot(direction, p.pol); - polarization.x = p.pol.x + constant*direction.x ; - polarization.y = p.pol.y + constant*direction.y ; - polarization.z = p.pol.z + constant*direction.z ; + polarization.x = p.pol.x + constant * direction.x; + polarization.y = p.pol.y + constant * direction.y; + polarization.z = p.pol.z + constant * direction.z; - if(dot(polarization, polarization) == 0.f ) + if (dot(polarization, polarization) == 0.f) { -#if defined( MOCK_CURAND ) || defined(MOCK_CUDA) +#if defined(MOCK_CURAND) || defined(MOCK_CUDA) //__sincosf(2.f*M_PIf*u3,&sinPhi,&cosPhi); - phi = 2.f*M_PIf*u3 ; + phi = 2.f * M_PIf * u3; sinPhi = sinf(phi); cosPhi = cosf(phi); #else - sincosf(2.f*M_PIf*u3,&sinPhi,&cosPhi); + sincosf(2.f * M_PIf * u3, &sinPhi, &cosPhi); #endif - polarization.x = cosPhi ; - polarization.y = sinPhi ; - polarization.z = 0.f ; + polarization.x = cosPhi; + polarization.y = sinPhi; + polarization.z = 0.f; smath::rotateUz(polarization, direction); } @@ -672,37 +662,41 @@ inline QSIM_METHOD void qsim::rayleigh_scatter(RNG& rng, sctx& ctx ) { // There are two directions which are perpendicular // to the new momentum direction - if(u3 < 0.5f) polarization = -polarization ; + if (u3 < 0.5f) + polarization = -polarization; } polarization = normalize(polarization); // simulate according to the distribution cos^2(theta) // where theta is the angle between old and new polarizations - float doCosTheta = dot(polarization, p.pol ) ; - float doCosTheta2 = doCosTheta*doCosTheta ; - looping = doCosTheta2 < u4 ; + float doCosTheta = dot(polarization, p.pol); + float doCosTheta2 = doCosTheta * doCosTheta; + looping = doCosTheta2 < u4; - } while ( looping ) ; + } while (looping); - p.mom = direction ; - p.pol = polarization ; + p.mom = direction; + p.pol = polarization; } - /** qsim::propagate_to_boundary ------------------------------ +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+ -| flag | command | changed | note | +| flag | command | changed | note | +=====================+==================+=========================================================+=======================================================+ -| BULK_REEMIT | CONTINUE | time, position, direction, polarization, wavelength | advance to reemit position with everything changed | +| BULK_REEMIT | CONTINUE | time, position, direction, polarization, wavelength | advance to reemit +position with everything changed | +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+ -| BULK_SCATTER | CONTINUE | time, position, direction, polarization | advance to scatter position, new dir+pol | +| BULK_SCATTER | CONTINUE | time, position, direction, polarization | advance to scatter +position, new dir+pol | +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+ -| BULK_ABSORB | BREAK | time, position | advance to absorption position, dir+pol unchanged | +| BULK_ABSORB | BREAK | time, position | advance to +absorption position, dir+pol unchanged | +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+ -| not set "SAIL" | BOUNDARY | time, position | advanced to border position, dir+pol unchanged | +| not set "SAIL" | BOUNDARY | time, position | advanced to border +position, dir+pol unchanged | +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+ @@ -713,94 +707,156 @@ qsim::propagate_to_boundary **/ - - -inline QSIM_METHOD int qsim::propagate_to_boundary(unsigned& flag, RNG& rng, sctx& ctx) +inline QSIM_METHOD int qsim::propagate_to_boundary(unsigned &flag, RNG &rng, sctx &ctx) { - sphoton& p = ctx.p ; - const sstate& s = ctx.s ; - - const float& absorption_length = s.material1.y ; - const float& scattering_length = s.material1.z ; - const float& reemission_prob = s.material1.w ; - const float& group_velocity = s.m1group2.x ; - const float& distance_to_boundary = ctx.prd->q0.f.w ; + sphoton &p = ctx.p; + const sstate &s = ctx.s; + const float &absorption_length = s.material1.y; + const float &scattering_length = s.material1.z; + const float &reemission_prob = s.material1.w; + const float &group_velocity = s.m1group2.x; + const float &wls_absorption_length = s.m1group2.y; + const float &distance_to_boundary = ctx.prd->q0.f.w; #if !defined(PRODUCTION) && defined(DEBUG_TAG) - float u_to_sci = curand_uniform(&rng) ; // purely for alignment with G4 - float u_to_bnd = curand_uniform(&rng) ; // purely for alignment with G4 + float u_to_sci = curand_uniform(&rng); // purely for alignment with G4 + float u_to_bnd = curand_uniform(&rng); // purely for alignment with G4 #endif - float u_scattering = curand_uniform(&rng) ; - float u_absorption = curand_uniform(&rng) ; + float u_scattering = curand_uniform(&rng); + float u_absorption = curand_uniform(&rng); + float u_wls_absorption = (wls != nullptr) ? curand_uniform(&rng) : 2.f; #if !defined(PRODUCTION) && defined(DEBUG_TAG) - stagr& tagr = ctx.tagr ; - tagr.add( stag_to_sci, u_to_sci); - tagr.add( stag_to_bnd, u_to_bnd); - tagr.add( stag_to_sca, u_scattering); - tagr.add( stag_to_abs, u_absorption); + stagr &tagr = ctx.tagr; + tagr.add(stag_to_sci, u_to_sci); + tagr.add(stag_to_bnd, u_to_bnd); + tagr.add(stag_to_sca, u_scattering); + tagr.add(stag_to_abs, u_absorption); #endif - #if !defined(PRODUCTION) && defined(DEBUG_LOGF) - // see notes/issues/U4LogTest_maybe_replacing_G4Log_G4UniformRand_in_Absorption_and_Scattering_with_float_version_will_avoid_deviations.rst - float scattering_distance = -scattering_length*KLUDGE_FASTMATH_LOGF(u_scattering); - float absorption_distance = -absorption_length*KLUDGE_FASTMATH_LOGF(u_absorption); + // see + // notes/issues/U4LogTest_maybe_replacing_G4Log_G4UniformRand_in_Absorption_and_Scattering_with_float_version_will_avoid_deviations.rst + float scattering_distance = -scattering_length * KLUDGE_FASTMATH_LOGF(u_scattering); + float absorption_distance = -absorption_length * KLUDGE_FASTMATH_LOGF(u_absorption); + float wls_absorption_distance = -wls_absorption_length * KLUDGE_FASTMATH_LOGF(u_wls_absorption); #else - float scattering_distance = -scattering_length*logf(u_scattering); - float absorption_distance = -absorption_length*logf(u_absorption); + float scattering_distance = -scattering_length * logf(u_scattering); + float absorption_distance = -absorption_length * logf(u_absorption); + float wls_absorption_distance = -wls_absorption_length * logf(u_wls_absorption); #endif #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.propagate_to_boundary.head pidx %7lld : u_absorption %10.8f logf(u_absorption) %10.8f absorption_length %10.4f absorption_distance %10.6f \n", - ctx.pidx, u_absorption, logf(u_absorption), absorption_length, absorption_distance ); + printf("//qsim.propagate_to_boundary.head pidx %7lld : u_absorption %10.8f logf(u_absorption) %10.8f " + "absorption_length %10.4f absorption_distance %10.6f \n", + ctx.pidx, u_absorption, logf(u_absorption), absorption_length, absorption_distance); - printf("//qsim.propagate_to_boundary.head pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time ); + printf("//qsim.propagate_to_boundary.head pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) \n", + ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time); - printf("//qsim.propagate_to_boundary.head pidx %7lld : distance_to_boundary %10.4f absorption_distance %10.4f scattering_distance %10.4f \n", - ctx.pidx, distance_to_boundary, absorption_distance, scattering_distance ); - - printf("//qsim.propagate_to_boundary.head pidx %7lld : u_scattering %10.4f u_absorption %10.4f \n", - ctx.pidx, u_scattering, u_absorption ); + printf("//qsim.propagate_to_boundary.head pidx %7lld : distance_to_boundary %10.4f absorption_distance %10.4f " + "scattering_distance %10.4f \n", + ctx.pidx, distance_to_boundary, absorption_distance, scattering_distance); + printf("//qsim.propagate_to_boundary.head pidx %7lld : u_scattering %10.4f u_absorption %10.4f \n", ctx.pidx, + u_scattering, u_absorption); } #endif + // WLS absorption competes with regular absorption and Rayleigh scattering. + // The process with the shortest sampled distance wins. + bool wls_wins = wls_absorption_distance <= absorption_distance && wls_absorption_distance <= scattering_distance; + + if (wls != nullptr && wls_wins && wls_absorption_distance <= distance_to_boundary) + { + // WLS ABSORPTION: photon absorbed by wavelength shifting material + p.time += wls_absorption_distance / group_velocity; + p.pos += wls_absorption_distance * (p.mom); + + unsigned mat_idx = s.index.x - 1u; // 0-based material index from 1-based optical index + if (wls->has_wls(mat_idx)) + { + // Sample re-emitted wavelength from WLS emission spectrum ICDF + float u_wls_wl = curand_uniform(&rng); + float new_wavelength = wls->wavelength(mat_idx, u_wls_wl); + + // Energy conservation: re-emitted photon must have lower energy (longer wavelength). + // Matches G4OpWLS algorithm: retry up to 100 times. + int attempts = 0; + while (new_wavelength < p.wavelength && attempts < 100) + { + u_wls_wl = curand_uniform(&rng); + new_wavelength = wls->wavelength(mat_idx, u_wls_wl); + attempts++; + } + + if (new_wavelength < p.wavelength) + { + // Failed energy conservation after 100 attempts — absorb without re-emission + flag = BULK_ABSORB; + return BREAK; + } + p.wavelength = new_wavelength; + // Isotropic re-emission direction and random polarization + float u_wls_mom_ph = curand_uniform(&rng); + float u_wls_mom_ct = curand_uniform(&rng); + float u_wls_pol_ph = curand_uniform(&rng); + float u_wls_pol_ct = curand_uniform(&rng); - if (absorption_distance <= scattering_distance) + p.mom = uniform_sphere(u_wls_mom_ph, u_wls_mom_ct); + p.pol = normalize(cross(uniform_sphere(u_wls_pol_ph, u_wls_pol_ct), p.mom)); + + // Apply WLS time delay (exponential decay) + float tc = wls->time_constant(mat_idx); + if (tc > 0.f) + { + float u_wls_time = curand_uniform(&rng); + p.time += -tc * logf(u_wls_time); + } + + flag = BULK_REEMIT; + return CONTINUE; + } + else + { + // Material map says no WLS — treat as regular absorption + flag = BULK_ABSORB; + return BREAK; + } + } + else if (absorption_distance <= scattering_distance) { if (absorption_distance <= distance_to_boundary) { - p.time += absorption_distance/group_velocity ; - p.pos += absorption_distance*(p.mom) ; - + p.time += absorption_distance / group_velocity; + p.pos += absorption_distance * (p.mom); #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - float absorb_time_delta = absorption_distance/group_velocity ; - if( ctx.pidx == base->pidx ) + float absorb_time_delta = absorption_distance / group_velocity; + if (ctx.pidx == base->pidx) { - printf("//qsim.propagate_to_boundary.body.BULK_ABSORB pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ; absorb_time_delta = %10.8f \n", - ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, absorb_time_delta ); - + printf("//qsim.propagate_to_boundary.body.BULK_ABSORB pidx %7lld : post = " + "np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ; absorb_time_delta = %10.8f \n", + ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, absorb_time_delta); } #endif - float u_reemit = reemission_prob == 0.f ? 2.f : curand_uniform(&rng); // avoid consumption at absorption when not scintillator - + float u_reemit = reemission_prob == 0.f + ? 2.f + : curand_uniform(&rng); // avoid consumption at absorption when not scintillator #if !defined(PRODUCTION) && defined(DEBUG_TAG) - if( u_reemit != 2.f ) tagr.add( stag_to_ree, u_reemit) ; + if (u_reemit != 2.f) + tagr.add(stag_to_ree, u_reemit); #endif - if (u_reemit < reemission_prob) { float u_re_wavelength = curand_uniform(&rng); @@ -814,19 +870,19 @@ inline QSIM_METHOD int qsim::propagate_to_boundary(unsigned& flag, RNG& rng, sct p.pol = normalize(cross(uniform_sphere(u_re_pol_ph, u_re_pol_ct), p.mom)); #if !defined(PRODUCTION) && defined(DEBUG_TAG) - tagr.add( stag_re_wl, u_re_wavelength); - tagr.add( stag_re_mom_ph, u_re_mom_ph); - tagr.add( stag_re_mom_ct, u_re_mom_ct); - tagr.add( stag_re_pol_ph, u_re_pol_ph); - tagr.add( stag_re_pol_ct, u_re_pol_ct); + tagr.add(stag_re_wl, u_re_wavelength); + tagr.add(stag_re_mom_ph, u_re_mom_ph); + tagr.add(stag_re_mom_ct, u_re_mom_ct); + tagr.add(stag_re_pol_ph, u_re_pol_ph); + tagr.add(stag_re_pol_ct, u_re_pol_ct); #endif - flag = BULK_REEMIT ; + flag = BULK_REEMIT; return CONTINUE; } else { - flag = BULK_ABSORB ; + flag = BULK_ABSORB; return BREAK; } } @@ -836,33 +892,33 @@ inline QSIM_METHOD int qsim::propagate_to_boundary(unsigned& flag, RNG& rng, sct { if (scattering_distance <= distance_to_boundary) { - p.time += scattering_distance/group_velocity ; - p.pos += scattering_distance*(p.mom) ; + p.time += scattering_distance / group_velocity; + p.pos += scattering_distance * (p.mom); - rayleigh_scatter(rng, ctx); // changes dir and pol, consumes 5u at each turn of rejection sampling loop + rayleigh_scatter(rng, ctx); // changes dir and pol, consumes 5u at each turn of rejection sampling loop flag = BULK_SCATTER; return CONTINUE; } - // otherwise sail to boundary - } // if scattering_distance < absorption_distance - - + // otherwise sail to boundary + } // if scattering_distance < absorption_distance - p.pos += distance_to_boundary*(p.mom) ; - p.time += distance_to_boundary/group_velocity ; + p.pos += distance_to_boundary * (p.mom); + p.time += distance_to_boundary / group_velocity; #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - float sail_time_delta = distance_to_boundary/group_velocity ; - if( ctx.pidx == base->pidx ) printf("//qsim.propagate_to_boundary.tail.SAIL pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ; sail_time_delta = %10.5f \n", - ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, sail_time_delta ); + float sail_time_delta = distance_to_boundary / group_velocity; + if (ctx.pidx == base->pidx) + printf("//qsim.propagate_to_boundary.tail.SAIL pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ; " + "sail_time_delta = %10.5f \n", + ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, sail_time_delta); #endif - return BOUNDARY ; + return BOUNDARY; } #endif -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) /** qsim::propagate_at_boundary ------------------------------------------ @@ -995,74 +1051,88 @@ incidence. **/ -inline QSIM_METHOD int qsim::propagate_at_boundary(unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const +inline QSIM_METHOD int qsim::propagate_at_boundary(unsigned &flag, RNG &rng, sctx &ctx, float theTransmittance) const { #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) - printf("//propagate_at_boundary.DEBUG_PIDX ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, base->pidx ); + if (ctx.pidx == base->pidx) + printf("//propagate_at_boundary.DEBUG_PIDX ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, + base->pidx); #endif #if !defined(PRODUCTION) && defined(DEBUG_TAG) - if(ctx.pidx == base->pidx) - printf("//propagate_at_boundary.DEBUG_TAG ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, base->pidx ); + if (ctx.pidx == base->pidx) + printf("//propagate_at_boundary.DEBUG_TAG ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, + base->pidx); #endif // stray "return 0;" left here 2024-12-14 caused : ~/j/issues/jok-tds-missing-BR-BT-on-A-side.rst - sphoton& p = ctx.p ; - const sstate& s = ctx.s ; + sphoton &p = ctx.p; + const sstate &s = ctx.s; - const float& n1 = s.material1.x ; - const float& n2 = s.material2.x ; - const float eta = n1/n2 ; + const float &n1 = s.material1.x; + const float &n2 = s.material2.x; + const float eta = n1 / n2; - const float3* normal = (float3*)&ctx.prd->q0.f.x ; // geometrical outwards normal + const float3 *normal = (float3 *)&ctx.prd->q0.f.x; // geometrical outwards normal - const float _c1 = -dot(p.mom, *normal ); // _c1 : cos(angle_of_incidence) not yet oriented - const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal) ; // oriented against incident p.mom - const float3 trans = cross(p.mom, oriented_normal) ; // perpendicular to plane of incidence, S-pol direction - const float trans_length = length(trans) ; // same as sin(theta), as p.mom and oriented_normal are unit vectors - const bool normal_incidence = trans_length < 1e-6f ; // p.mom parallel/anti-parallel to oriented_normal - const float3 A_trans = normal_incidence ? p.pol : trans/trans_length ; // normalized unit vector : perpendicular to plane of incidence - const float E1_perp = dot(p.pol, A_trans); // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization + const float _c1 = -dot(p.mom, *normal); // _c1 : cos(angle_of_incidence) not yet oriented + const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal); // oriented against incident p.mom + const float3 trans = cross(p.mom, oriented_normal); // perpendicular to plane of incidence, S-pol direction + const float trans_length = length(trans); // same as sin(theta), as p.mom and oriented_normal are unit vectors + const bool normal_incidence = trans_length < 1e-6f; // p.mom parallel/anti-parallel to oriented_normal + const float3 A_trans = + normal_incidence ? p.pol : trans / trans_length; // normalized unit vector : perpendicular to plane of incidence + const float E1_perp = + dot(p.pol, + A_trans); // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization - const float c1 = fabs(_c1) ; + const float c1 = fabs(_c1); #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.propagate_at_boundary.head pidx %7lld : theTransmittance = %10.8f \n", ctx.pidx, theTransmittance ); - printf("//qsim.propagate_at_boundary.head pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n", - ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal) ); - printf("//qsim.propagate_at_boundary.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n", - ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos) ); - printf("//qsim.propagate_at_boundary.head pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = %10.8f \n", - ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) ); - printf("//qsim.propagate_at_boundary.head pidx %7lld : pol0 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol0 = %10.8f \n", - ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol) ); - printf("//qsim.propagate_at_boundary.head pidx %7lld : n1,n2,eta = (%10.8f,%10.8f,%10.8f) \n", ctx.pidx, n1, n2, eta ); - printf("//qsim.propagate_at_boundary.head pidx %7lld : c1 = %10.8f ; normal_incidence = %d \n", ctx.pidx, c1, normal_incidence ); + printf("//qsim.propagate_at_boundary.head pidx %7lld : theTransmittance = %10.8f \n", ctx.pidx, + theTransmittance); + printf( + "//qsim.propagate_at_boundary.head pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n", + ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal)); + printf( + "//qsim.propagate_at_boundary.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n", + ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos)); + printf("//qsim.propagate_at_boundary.head pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = " + "%10.8f \n", + ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom)); + printf("//qsim.propagate_at_boundary.head pidx %7lld : pol0 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol0 = " + "%10.8f \n", + ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol)); + printf("//qsim.propagate_at_boundary.head pidx %7lld : n1,n2,eta = (%10.8f,%10.8f,%10.8f) \n", ctx.pidx, n1, n2, + eta); + printf("//qsim.propagate_at_boundary.head pidx %7lld : c1 = %10.8f ; normal_incidence = %d \n", ctx.pidx, c1, + normal_incidence); } #endif - const float c2c2 = 1.f - eta*eta*(1.f - c1 * c1 ) ; // Snells law and trig identity - bool tir = c2c2 < 0.f ; - const float EdotN = dot(p.pol, oriented_normal ) ; // used for TIR polarization - const float c2 = tir ? 0.f : sqrtf(c2c2) ; // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect - const float n1c1 = n1*c1 ; - const float n2c2 = n2*c2 ; - const float n2c1 = n2*c1 ; - const float n1c2 = n1*c2 ; - - const float2 E1 = normal_incidence ? make_float2( 0.f, 1.f) : make_float2( E1_perp , length( p.pol - (E1_perp*A_trans) ) ); - const float2 E2_t = make_float2( 2.f*n1c1*E1.x/(n1c1+n2c2), 2.f*n1c1*E1.y/(n2c1+n1c2) ) ; // ( S:perp, P:parl ) - const float2 E2_r = make_float2( E2_t.x - E1.x , (n2*E2_t.y/n1) - E1.y ) ; // ( S:perp, P:parl ) - const float2 RR = normalize(E2_r) ; - const float2 TT = normalize(E2_t) ; - const float TransCoeff = theTransmittance >= 0.f ? - theTransmittance - : - ( tir || n1c1 == 0.f ? 0.f : n2c2*dot(E2_t,E2_t)/n1c1 ) - ; + const float c2c2 = 1.f - eta * eta * (1.f - c1 * c1); // Snells law and trig identity + bool tir = c2c2 < 0.f; + const float EdotN = dot(p.pol, oriented_normal); // used for TIR polarization + const float c2 = + tir ? 0.f + : sqrtf( + c2c2); // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect + const float n1c1 = n1 * c1; + const float n2c2 = n2 * c2; + const float n2c1 = n2 * c1; + const float n1c2 = n1 * c2; + + const float2 E1 = + normal_incidence ? make_float2(0.f, 1.f) : make_float2(E1_perp, length(p.pol - (E1_perp * A_trans))); + const float2 E2_t = + make_float2(2.f * n1c1 * E1.x / (n1c1 + n2c2), 2.f * n1c1 * E1.y / (n2c1 + n1c2)); // ( S:perp, P:parl ) + const float2 E2_r = make_float2(E2_t.x - E1.x, (n2 * E2_t.y / n1) - E1.y); // ( S:perp, P:parl ) + const float2 RR = normalize(E2_r); + const float2 TT = normalize(E2_t); + const float TransCoeff = + theTransmittance >= 0.f ? theTransmittance : (tir || n1c1 == 0.f ? 0.f : n2c2 * dot(E2_t, E2_t) / n1c1); /* E1, E2_t, E2_t: incident, transmitted and reflected amplitudes in S and P directions @@ -1070,138 +1140,128 @@ inline QSIM_METHOD int qsim::propagate_at_boundary(unsigned& flag, RNG& rng, sct */ #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.propagate_at_boundary.body pidx %7lld : TransCoeff = %10.8f ; n1c1 = %10.8f ; n2c2 = %10.8f \n", - ctx.pidx, TransCoeff, n1c1, n2c2 ); - - printf("//qsim.propagate_at_boundary.body pidx %7lld : E2_t = np.array([%10.8f,%10.8f]) ; lE2_t = %10.8f \n", - ctx.pidx, E2_t.x, E2_t.y, length(E2_t) ); + printf("//qsim.propagate_at_boundary.body pidx %7lld : TransCoeff = %10.8f ; n1c1 = %10.8f ; n2c2 = %10.8f \n", + ctx.pidx, TransCoeff, n1c1, n2c2); - printf("//qsim.propagate_at_boundary.body pidx %7lld : A_trans = np.array([%10.8f,%10.8f,%10.8f]) ; lA_trans = %10.8f \n", - ctx.pidx, A_trans.x, A_trans.y, A_trans.z, length(A_trans) ); + printf("//qsim.propagate_at_boundary.body pidx %7lld : E2_t = np.array([%10.8f,%10.8f]) ; lE2_t = %10.8f \n", + ctx.pidx, E2_t.x, E2_t.y, length(E2_t)); + printf("//qsim.propagate_at_boundary.body pidx %7lld : A_trans = np.array([%10.8f,%10.8f,%10.8f]) ; lA_trans = " + "%10.8f \n", + ctx.pidx, A_trans.x, A_trans.y, A_trans.z, length(A_trans)); } #endif - #if !defined(PRODUCTION) && defined(DEBUG_TAG) - const float u_boundary_burn = curand_uniform(&rng) ; // needed for random consumption alignment with Geant4 G4OpBoundaryProcess::PostStepDoIt + const float u_boundary_burn = + curand_uniform(&rng); // needed for random consumption alignment with Geant4 G4OpBoundaryProcess::PostStepDoIt #endif - const float u_reflect = curand_uniform(&rng) ; - bool reflect = u_reflect > TransCoeff ; + const float u_reflect = curand_uniform(&rng); + bool reflect = u_reflect > TransCoeff; #if !defined(PRODUCTION) && defined(DEBUG_TAG) - stagr& tagr = ctx.tagr ; - tagr.add( stag_at_burn_sf_sd, u_boundary_burn); - tagr.add( stag_at_ref, u_reflect); + stagr &tagr = ctx.tagr; + tagr.add(stag_at_burn_sf_sd, u_boundary_burn); + tagr.add(stag_at_ref, u_reflect); #endif #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.propagate_at_boundary.body pidx %7lld : u_reflect %10.4f TransCoeff %10.4f reflect %d \n", - ctx.pidx, u_reflect, TransCoeff, reflect ); - - printf("//qsim.propagate_at_boundary.body pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = %10.8f \n", - ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) ) ; + printf("//qsim.propagate_at_boundary.body pidx %7lld : u_reflect %10.4f TransCoeff %10.4f reflect %d \n", + ctx.pidx, u_reflect, TransCoeff, reflect); - printf("//qsim.propagate_at_boundary.body pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n", - ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos) ); + printf("//qsim.propagate_at_boundary.body pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = " + "%10.8f \n", + ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom)); - printf("//qsim.propagate_at_boundary.body pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n", - ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal) ) ; + printf( + "//qsim.propagate_at_boundary.body pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n", + ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos)); - printf("//qsim.propagate_at_boundary.body pidx %7lld : n1 = %10.8f ; n2 = %10.8f ; eta = %10.8f \n", - ctx.pidx, n1, n2, eta ); + printf( + "//qsim.propagate_at_boundary.body pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n", + ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal)); - printf("//qsim.propagate_at_boundary.body pidx %7lld : c1 = %10.8f ; eta_c1 = %10.8f ; c2 = %10.8f ; eta_c1__c2 = %10.8f \n", - ctx.pidx, c1, eta*c1, c2, (eta*c1 - c2) ); + printf("//qsim.propagate_at_boundary.body pidx %7lld : n1 = %10.8f ; n2 = %10.8f ; eta = %10.8f \n", ctx.pidx, + n1, n2, eta); + printf("//qsim.propagate_at_boundary.body pidx %7lld : c1 = %10.8f ; eta_c1 = %10.8f ; c2 = %10.8f ; " + "eta_c1__c2 = %10.8f \n", + ctx.pidx, c1, eta * c1, c2, (eta * c1 - c2)); } #endif - p.mom = reflect - ? - p.mom + 2.0f*c1*oriented_normal - : - eta*(p.mom) + (eta*c1 - c2)*oriented_normal - ; - + p.mom = reflect ? p.mom + 2.0f * c1 * oriented_normal : eta * (p.mom) + (eta * c1 - c2) * oriented_normal; // Q: Does the new p.mom need to be normalized ? // A: NO, it is inherently normalized as derived in the comment below + const float3 A_paral = normalize(cross(p.mom, A_trans)); // new P-pol direction - const float3 A_paral = normalize(cross(p.mom, A_trans)); // new P-pol direction - - p.pol = normal_incidence ? - ( reflect ? p.pol*(n2>n1? -1.f:1.f) : p.pol ) - : - ( reflect ? - ( tir ? -p.pol + 2.f*EdotN*oriented_normal : RR.x*A_trans + RR.y*A_paral ) - - : - TT.x*A_trans + TT.y*A_paral - - ) - ; - + p.pol = normal_incidence + ? (reflect ? p.pol * (n2 > n1 ? -1.f : 1.f) : p.pol) + : (reflect ? (tir ? -p.pol + 2.f * EdotN * oriented_normal : RR.x * A_trans + RR.y * A_paral) + : TT.x * A_trans + TT.y * A_paral - // Q: Above expression kinda implies A_trans and A_paral are same for reflect and transmit ? - // A: NO IT DOESNT, - // A_trans is the same (except for normal incidence) as there is only one perpendicular - // to the plane of incidence which is the same for i,r,t. - // - // A_paral depends on the new p.mom (is has to be orthogonal to p.mom and A_trans) - // and p.mom of course is different for r and t - // (the reflect bool is used in multiple places, not just here) - + ); + // Q: Above expression kinda implies A_trans and A_paral are same for reflect and transmit ? + // A: NO IT DOESNT, + // A_trans is the same (except for normal incidence) as there is only one perpendicular + // to the plane of incidence which is the same for i,r,t. + // + // A_paral depends on the new p.mom (is has to be orthogonal to p.mom and A_trans) + // and p.mom of course is different for r and t + // (the reflect bool is used in multiple places, not just here) #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.propagate_at_boundary.tail pidx %7lld : reflect %d tir %d TransCoeff %10.4f u_reflect %10.4f \n", ctx.pidx, reflect, tir, TransCoeff, u_reflect ); - printf("//qsim.propagate_at_boundary.tail pidx %7lld : mom1 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom1 = %10.8f \n", - ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) ); - printf("//qsim.propagate_at_boundary.tail pidx %7lld : pol1 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol1 = %10.8f \n", - ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol) ); + printf("//qsim.propagate_at_boundary.tail pidx %7lld : reflect %d tir %d TransCoeff %10.4f u_reflect %10.4f \n", + ctx.pidx, reflect, tir, TransCoeff, u_reflect); + printf("//qsim.propagate_at_boundary.tail pidx %7lld : mom1 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom1 = " + "%10.8f \n", + ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom)); + printf("//qsim.propagate_at_boundary.tail pidx %7lld : pol1 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol1 = " + "%10.8f \n", + ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol)); } /* if(ctx.pidx == 251959) { - printf("//qsim.propagate_at_boundary RR.x %10.4f A_trans (%10.4f %10.4f %10.4f ) RR.y %10.4f A_paral (%10.4f %10.4f %10.4f ) \n", - RR.x, A_trans.x, A_trans.y, A_trans.z, - RR.y, A_paral.x, A_paral.y, A_paral.z ); + printf("//qsim.propagate_at_boundary RR.x %10.4f A_trans (%10.4f %10.4f %10.4f ) RR.y %10.4f A_paral (%10.4f + %10.4f %10.4f ) \n", RR.x, A_trans.x, A_trans.y, A_trans.z, RR.y, A_paral.x, A_paral.y, A_paral.z ); - printf("//qsim.propagate_at_boundary reflect %d tir %d polarization (%10.4f, %10.4f, %10.4f) \n", reflect, tir, p.pol.x, p.pol.y, p.pol.z ); + printf("//qsim.propagate_at_boundary reflect %d tir %d polarization (%10.4f, %10.4f, %10.4f) \n", reflect, tir, + p.pol.x, p.pol.y, p.pol.z ); } */ #endif - flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT ; - + flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT; #if !defined(PRODUCTION) && defined(DEBUG_TAG) - if( flag == BOUNDARY_REFLECT ) + if (flag == BOUNDARY_REFLECT) { - const float u_br_align_0 = curand_uniform(&rng) ; - const float u_br_align_1 = curand_uniform(&rng) ; - const float u_br_align_2 = curand_uniform(&rng) ; - const float u_br_align_3 = curand_uniform(&rng) ; + const float u_br_align_0 = curand_uniform(&rng); + const float u_br_align_1 = curand_uniform(&rng); + const float u_br_align_2 = curand_uniform(&rng); + const float u_br_align_3 = curand_uniform(&rng); // switched below standard tags from stag_br_align_0/1/2/3 to simplify A:tag to B:stack mapping - tagr.add( stag_to_sci, u_br_align_0 ); - tagr.add( stag_to_bnd, u_br_align_1 ); - tagr.add( stag_to_sca, u_br_align_2 ); - tagr.add( stag_to_abs, u_br_align_3 ); + tagr.add(stag_to_sci, u_br_align_0); + tagr.add(stag_to_bnd, u_br_align_1); + tagr.add(stag_to_sca, u_br_align_2); + tagr.add(stag_to_abs, u_br_align_3); } #endif - return CONTINUE ; + return CONTINUE; } /** qsim::propagate_at_boundary_with_T @@ -1221,93 +1281,83 @@ and leave this just for testing. **/ -inline QSIM_METHOD int qsim::propagate_at_boundary_with_T(unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const +inline QSIM_METHOD int qsim::propagate_at_boundary_with_T(unsigned &flag, RNG &rng, sctx &ctx, + float theTransmittance) const { - sphoton& p = ctx.p ; - const sstate& s = ctx.s ; - - const float& n1 = s.material1.x ; - const float& n2 = s.material2.x ; - const float eta = n1/n2 ; - - const float3* normal = (float3*)&ctx.prd->q0.f.x ; // geometrical outwards normal - - const float _c1 = -dot(p.mom, *normal ); // _c1 : cos(angle_of_incidence) not yet oriented - const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal) ; // oriented against incident p.mom - const float3 trans = cross(p.mom, oriented_normal) ; // perpendicular to plane of incidence, S-pol direction - const float trans_length = length(trans) ; // same as sin(theta), as p.mom and oriented_normal are unit vectors - const bool normal_incidence = trans_length < 1e-6f ; // p.mom parallel/anti-parallel to oriented_normal - const float3 A_trans = normal_incidence ? p.pol : trans/trans_length ; // normalized unit vector : perpendicular to plane of incidence - const float E1_perp = dot(p.pol, A_trans); // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization + sphoton &p = ctx.p; + const sstate &s = ctx.s; + + const float &n1 = s.material1.x; + const float &n2 = s.material2.x; + const float eta = n1 / n2; + + const float3 *normal = (float3 *)&ctx.prd->q0.f.x; // geometrical outwards normal + + const float _c1 = -dot(p.mom, *normal); // _c1 : cos(angle_of_incidence) not yet oriented + const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal); // oriented against incident p.mom + const float3 trans = cross(p.mom, oriented_normal); // perpendicular to plane of incidence, S-pol direction + const float trans_length = length(trans); // same as sin(theta), as p.mom and oriented_normal are unit vectors + const bool normal_incidence = trans_length < 1e-6f; // p.mom parallel/anti-parallel to oriented_normal + const float3 A_trans = + normal_incidence ? p.pol : trans / trans_length; // normalized unit vector : perpendicular to plane of incidence + const float E1_perp = + dot(p.pol, + A_trans); // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization + + const float c1 = fabs(_c1); + + const float c2c2 = 1.f - eta * eta * (1.f - c1 * c1); // Snells law and trig identity + bool tir = c2c2 < 0.f; + const float EdotN = dot(p.pol, oriented_normal); // used for TIR polarization + const float c2 = + tir ? 0.f + : sqrtf( + c2c2); // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect + const float n1c1 = n1 * c1; + const float n2c2 = n2 * c2; + const float n2c1 = n2 * c1; + const float n1c2 = n1 * c2; + + const float2 E1 = + normal_incidence ? make_float2(0.f, 1.f) : make_float2(E1_perp, length(p.pol - (E1_perp * A_trans))); + const float2 E2_t = + make_float2(2.f * n1c1 * E1.x / (n1c1 + n2c2), 2.f * n1c1 * E1.y / (n2c1 + n1c2)); // ( S:perp, P:parl ) + const float2 E2_r = make_float2(E2_t.x - E1.x, (n2 * E2_t.y / n1) - E1.y); // ( S:perp, P:parl ) + const float2 RR = normalize(E2_r); + const float2 TT = normalize(E2_t); - const float c1 = fabs(_c1) ; - - const float c2c2 = 1.f - eta*eta*(1.f - c1 * c1 ) ; // Snells law and trig identity - bool tir = c2c2 < 0.f ; - const float EdotN = dot(p.pol, oriented_normal ) ; // used for TIR polarization - const float c2 = tir ? 0.f : sqrtf(c2c2) ; // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect - const float n1c1 = n1*c1 ; - const float n2c2 = n2*c2 ; - const float n2c1 = n2*c1 ; - const float n1c2 = n1*c2 ; - - const float2 E1 = normal_incidence ? make_float2( 0.f, 1.f) : make_float2( E1_perp , length( p.pol - (E1_perp*A_trans) ) ); - const float2 E2_t = make_float2( 2.f*n1c1*E1.x/(n1c1+n2c2), 2.f*n1c1*E1.y/(n2c1+n1c2) ) ; // ( S:perp, P:parl ) - const float2 E2_r = make_float2( E2_t.x - E1.x , (n2*E2_t.y/n1) - E1.y ) ; // ( S:perp, P:parl ) - const float2 RR = normalize(E2_r) ; - const float2 TT = normalize(E2_t) ; - - -/* - const float TransCoeff = theTransmittance >= 0.f ? - theTransmittance - : - ( tir || n1c1 == 0.f ? 0.f : n2c2*dot(E2_t,E2_t)/n1c1 ) - ; -*/ - - const float& TransCoeff = theTransmittance ; - - const float u_reflect = curand_uniform(&rng) ; - bool reflect = u_reflect > TransCoeff ; + /* + const float TransCoeff = theTransmittance >= 0.f ? + theTransmittance + : + ( tir || n1c1 == 0.f ? 0.f : n2c2*dot(E2_t,E2_t)/n1c1 ) + ; + */ - p.mom = reflect - ? - p.mom + 2.0f*c1*oriented_normal - : - eta*(p.mom) + (eta*c1 - c2)*oriented_normal - ; + const float &TransCoeff = theTransmittance; + const float u_reflect = curand_uniform(&rng); + bool reflect = u_reflect > TransCoeff; - const float3 A_paral = normalize(cross(p.mom, A_trans)); // new P-pol direction + p.mom = reflect ? p.mom + 2.0f * c1 * oriented_normal : eta * (p.mom) + (eta * c1 - c2) * oriented_normal; - p.pol = normal_incidence ? - ( reflect ? p.pol*(n2>n1? -1.f:1.f) : p.pol ) - : - ( reflect ? - ( tir ? -p.pol + 2.f*EdotN*oriented_normal : RR.x*A_trans + RR.y*A_paral ) + const float3 A_paral = normalize(cross(p.mom, A_trans)); // new P-pol direction - : - TT.x*A_trans + TT.y*A_paral + p.pol = normal_incidence + ? (reflect ? p.pol * (n2 > n1 ? -1.f : 1.f) : p.pol) + : (reflect ? (tir ? -p.pol + 2.f * EdotN * oriented_normal : RR.x * A_trans + RR.y * A_paral) - ) - ; + : TT.x * A_trans + TT.y * A_paral - flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT ; + ); + flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT; - return CONTINUE ; + return CONTINUE; } #endif - - - - - - - /** Reflected momentum vector --------------------------- @@ -1444,12 +1494,6 @@ Compare transmitted vector with G4OpBoundaryProcess::DielectricDielectric **/ - - - - - - /* G4OpBoundaryProcess::DielectricDielectric @@ -1514,7 +1558,6 @@ transmit */ - /* qsim::propagate_at_surface_MultiFilm ------------------------------- @@ -1535,13 +1578,13 @@ Tp: p-component reflect probability */ #if defined(__CUDACC__) || defined(__CUDABE__) -inline QSIM_METHOD int qsim::propagate_at_surface_MultiFilm(unsigned& flag, RNG& rng, sctx& ctx ) +inline QSIM_METHOD int qsim::propagate_at_surface_MultiFilm(unsigned &flag, RNG &rng, sctx &ctx) { - const float one = 1.0f; - const sphoton& p = ctx.p ; - const float3* normal = (float3*)&ctx.prd->q0.f.x ; - int lpmtid = ctx.prd->identity() - 1 ; // identity comes from optixInstance.instanceId where 0 means not-a-sensor + const float one = 1.0f; + const sphoton &p = ctx.p; + const float3 *normal = (float3 *)&ctx.prd->q0.f.x; + int lpmtid = ctx.prd->identity() - 1; // identity comes from optixInstance.instanceId where 0 means not-a-sensor float minus_cos_theta = dot(p.mom, *normal); int pmtcat = pmt->get_lpmtcat_from_lpmtid(lpmtid); @@ -1549,66 +1592,61 @@ inline QSIM_METHOD int qsim::propagate_at_surface_MultiFilm(unsigned& flag, RNG& float4 RsTsRpTp = multifilm->lookup(pmtcat, wv_nm, minus_cos_theta); + const float c1 = fabs(minus_cos_theta); + const float s1 = sqrtf(one - c1 * c1); - const float c1 = fabs(minus_cos_theta) ; - const float s1 = sqrtf(one -c1*c1); - - float EsEs = s1 > 0.f ? dot(p.pol, cross( p.mom, *normal))/s1 : 0.f ; - EsEs *= EsEs; // orienting normal doesnt matter as squared : this is S_vs_P power fraction - - - float3 ART ; - ART.z = RsTsRpTp.y*EsEs + RsTsRpTp.w*(one - EsEs); - ART.y = RsTsRpTp.x*EsEs + RsTsRpTp.z*(one - EsEs); - ART.x = one - (ART.y+ART.z); + float EsEs = s1 > 0.f ? dot(p.pol, cross(p.mom, *normal)) / s1 : 0.f; + EsEs *= EsEs; // orienting normal doesnt matter as squared : this is S_vs_P power fraction - const float& A = ART.x ; - const float& T = ART.z ; + float3 ART; + ART.z = RsTsRpTp.y * EsEs + RsTsRpTp.w * (one - EsEs); + ART.y = RsTsRpTp.x * EsEs + RsTsRpTp.z * (one - EsEs); + ART.x = one - (ART.y + ART.z); + const float &A = ART.x; + const float &T = ART.z; - float4 RsTsRpTpNormal = multifilm->lookup(pmtcat, wv_nm, -one ); + float4 RsTsRpTpNormal = multifilm->lookup(pmtcat, wv_nm, -one); // Normal means the photon incident from glass to vacuum, AOI = 0 deg cos_theta = -1.f float3 ART_normal; - ART_normal.z = 0.5f*(RsTsRpTpNormal.y + RsTsRpTpNormal.w); // T:0.5f*(Ts+Tp) - ART_normal.y = 0.5f*(RsTsRpTpNormal.x + RsTsRpTpNormal.z); // R:0.5f*(Rs+Rp) - ART_normal.x = one -(ART_normal.y + ART_normal.z) ; // 1.f - (R+T) + ART_normal.z = 0.5f * (RsTsRpTpNormal.y + RsTsRpTpNormal.w); // T:0.5f*(Ts+Tp) + ART_normal.y = 0.5f * (RsTsRpTpNormal.x + RsTsRpTpNormal.z); // R:0.5f*(Rs+Rp) + ART_normal.x = one - (ART_normal.y + ART_normal.z); // 1.f - (R+T) - const float& An = ART_normal.x ; - const float energy_eV = qpmt::hc_eVnm/wv_nm ; + const float &An = ART_normal.x; + const float energy_eV = qpmt::hc_eVnm / wv_nm; const float qe_scale = pmt->get_qescale_from_lpmtid(lpmtid); const float qe_shape = pmt->get_lpmtcat_qe(pmtcat, energy_eV); const float _qe = minus_cos_theta > 0.f ? 0.f : qe_scale * qe_shape; - const float& theAbsorption = A; - const float& theTransmittance = T/(one-A); - const float& theEfficiency = _qe/An; + const float &theAbsorption = A; + const float &theTransmittance = T / (one - A); + const float &theEfficiency = _qe / An; float u_theAbsorption = curand_uniform(&rng); - int action = u_theAbsorption < theAbsorption ? BREAK : CONTINUE ; + int action = u_theAbsorption < theAbsorption ? BREAK : CONTINUE; - if( action == BREAK ) + if (action == BREAK) { - float u_theEfficiency = curand_uniform(&rng) ; - flag = u_theEfficiency < theEfficiency ? SURFACE_DETECT : SURFACE_ABSORB ; + float u_theEfficiency = curand_uniform(&rng); + flag = u_theEfficiency < theEfficiency ? SURFACE_DETECT : SURFACE_ABSORB; } else { - propagate_at_boundary( flag, rng, ctx, theTransmittance ); + propagate_at_boundary(flag, rng, ctx, theTransmittance); } - //printf("//qsim.propagate_at_surface_MultiFilm pidx %7lld lpmtid %d ART ( %7.3f %7.3f %7.3f ) u_theAbsorption %7.3f action %d \n", - //ctx.pidx, lpmtid, ART.x, ART.y, ART.z, u_theAbsorption, action); - - return action ; + // printf("//qsim.propagate_at_surface_MultiFilm pidx %7lld lpmtid %d ART ( %7.3f %7.3f %7.3f ) u_theAbsorption + // %7.3f action %d \n", ctx.pidx, lpmtid, ART.x, ART.y, ART.z, u_theAbsorption, action); + return action; } #endif - -#if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA) +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) /** qsim::propagate_at_surface (HMM: perhaps propagate_at_simplified_surface ) @@ -1674,87 +1712,82 @@ The s.surface float4 is filled by qbnd::fill_state via:: **/ -inline QSIM_METHOD int qsim::propagate_at_surface(unsigned& flag, RNG& rng, sctx& ctx) +inline QSIM_METHOD int qsim::propagate_at_surface(unsigned &flag, RNG &rng, sctx &ctx) { - const sstate& s = ctx.s ; - const float& detect = s.surface.x ; - const float& absorb = s.surface.y ; - //const float& reflect_specular_ = s.surface.z ; - const float& reflect_diffuse_ = s.surface.w ; + const sstate &s = ctx.s; + const float &detect = s.surface.x; + const float &absorb = s.surface.y; + // const float& reflect_specular_ = s.surface.z ; + const float &reflect_diffuse_ = s.surface.w; float u_surface = curand_uniform(&rng); #if !defined(PRODUCTION) && defined(DEBUG_TAG) - stagr& tagr = ctx.tagr ; + stagr &tagr = ctx.tagr; float u_surface_burn = curand_uniform(&rng); - tagr.add( stag_at_burn_sf_sd, u_surface); - tagr.add( stag_sf_burn, u_surface_burn); + tagr.add(stag_at_burn_sf_sd, u_surface); + tagr.add(stag_sf_burn, u_surface_burn); #endif + int action = u_surface < absorb + detect ? BREAK : CONTINUE; - int action = u_surface < absorb + detect ? BREAK : CONTINUE ; - - if( action == BREAK ) + if (action == BREAK) { #if defined(WITH_CUSTOM4) - int pmtid = ctx.prd->identity() - 1 ; // identity comes from optixInstance.instanceId where 0 means not-a-sensor - float qe = 1.f ; + int pmtid = ctx.prd->identity() - 1; // identity comes from optixInstance.instanceId where 0 means not-a-sensor + float qe = 1.f; float u_qe = curand_uniform(&rng); - if( s_pmt::is_spmtid(pmtid) ) + if (s_pmt::is_spmtid(pmtid)) { - const float energy_eV = qpmt::hc_eVnm/ctx.p.wavelength ; - float qe_shape = pmt->s_qeshape_prop->interpolate( 0, energy_eV ); + const float energy_eV = qpmt::hc_eVnm / ctx.p.wavelength; + float qe_shape = pmt->s_qeshape_prop->interpolate(0, energy_eV); float qe_scale = pmt->get_s_qescale_from_spmtid(pmtid); - qe = qe_shape*qe_scale ; + qe = qe_shape * qe_scale; #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) - printf("//qsim.propagate_at_surface.BREAK.is_spmtid pidx %7lld : pmtid %d energy_eV %7.3f qe_shape %7.3f qe_scale %7.3f qe %7.3f detect %7.3f absorb %7.3f reflect_specular %7.3f reflect_diffuse %7.3f \n" , - ctx.pidx, pmtid, energy_eV, qe_shape, qe_scale, qe, detect, absorb, s.surface.z, reflect_diffuse_ ); + if (ctx.pidx == base->pidx) + printf( + "//qsim.propagate_at_surface.BREAK.is_spmtid pidx %7lld : pmtid %d energy_eV %7.3f qe_shape %7.3f " + "qe_scale %7.3f qe %7.3f detect %7.3f absorb %7.3f reflect_specular %7.3f reflect_diffuse %7.3f \n", + ctx.pidx, pmtid, energy_eV, qe_shape, qe_scale, qe, detect, absorb, s.surface.z, reflect_diffuse_); #endif } - flag = u_surface < absorb ? - SURFACE_ABSORB - : - ( u_qe < qe ? EFFICIENCY_COLLECT : EFFICIENCY_CULL ) - ; + flag = u_surface < absorb ? SURFACE_ABSORB : (u_qe < qe ? EFFICIENCY_COLLECT : EFFICIENCY_CULL); #else - flag = u_surface < absorb ? - SURFACE_ABSORB - : - SURFACE_DETECT - ; + flag = u_surface < absorb ? SURFACE_ABSORB : SURFACE_DETECT; #endif #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) - printf("//qsim.propagate_at_surface.SA/SD.BREAK pidx %7lld : flag %d \n" , ctx.pidx, flag ); + if (ctx.pidx == base->pidx) + printf("//qsim.propagate_at_surface.SA/SD.BREAK pidx %7lld : flag %d \n", ctx.pidx, flag); #endif } else { - flag = u_surface < absorb + detect + reflect_diffuse_ ? SURFACE_DREFLECT : SURFACE_SREFLECT ; - switch(flag) + flag = u_surface < absorb + detect + reflect_diffuse_ ? SURFACE_DREFLECT : SURFACE_SREFLECT; + switch (flag) { - case SURFACE_DREFLECT: reflect_diffuse( rng, ctx) ; break ; - case SURFACE_SREFLECT: reflect_specular(rng, ctx) ; break ; + case SURFACE_DREFLECT: + reflect_diffuse(rng, ctx); + break; + case SURFACE_SREFLECT: + reflect_specular(rng, ctx); + break; } #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) - printf("//qsim.propagate_at_surface.DR/SR.CONTINUE pidx %7lld : flag %d \n" , ctx.pidx, flag ); + if (ctx.pidx == base->pidx) + printf("//qsim.propagate_at_surface.DR/SR.CONTINUE pidx %7lld : flag %d \n", ctx.pidx, flag); #endif } - return action ; + return action; } - -inline QSIM_METHOD int qsim::propagate_at_surface_Detect(unsigned& flag, RNG& rng, sctx& ctx) const +inline QSIM_METHOD int qsim::propagate_at_surface_Detect(unsigned &flag, RNG &rng, sctx &ctx) const { - float u_surface_burn = curand_uniform(&rng); // for random alignment - flag = SURFACE_DETECT ; - return BREAK ; + float u_surface_burn = curand_uniform(&rng); // for random alignment + flag = SURFACE_DETECT; + return BREAK; } - #if defined(WITH_CUSTOM4) /** @@ -1780,125 +1813,134 @@ Where ctx.prd->identity() comes from ? Where is the "+ 1" done ? **/ -inline QSIM_METHOD int qsim::propagate_at_surface_CustomART(unsigned& flag, RNG& rng, sctx& ctx) const +inline QSIM_METHOD int qsim::propagate_at_surface_CustomART(unsigned &flag, RNG &rng, sctx &ctx) const { - sphoton& p = ctx.p ; - const float3* normal = (float3*)&ctx.prd->q0.f.x ; // geometrical outwards normal - int lpmtid = ctx.prd->identity() - 1 ; // identity comes from optixInstance.instanceId where 0 means not-a-sensor - const float lposcost = ctx.prd->lposcost() ; // local frame intersect position cosine theta - + sphoton &p = ctx.p; + const float3 *normal = (float3 *)&ctx.prd->q0.f.x; // geometrical outwards normal + int lpmtid = ctx.prd->identity() - 1; // identity comes from optixInstance.instanceId where 0 means not-a-sensor + const float lposcost = ctx.prd->lposcost(); // local frame intersect position cosine theta float minus_cos_theta = dot(p.mom, *normal); - float dot_pol_cross_mom_nrm = dot(p.pol,cross(p.mom,*normal)) ; + float dot_pol_cross_mom_nrm = dot(p.pol, cross(p.mom, *normal)); #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx_debug ) + if (ctx.pidx_debug) { - float3 cross_mom_nrm = cross(p.mom, *normal) ; - printf("//qsim::propagate_at_surface_CustomART pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = %10.8f \n", - ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) ); - printf("//qsim::propagate_at_surface_CustomART pidx %7lld : pol = np.array([%10.8f,%10.8f,%10.8f]) ; lpol = %10.8f \n", - ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol) ); - printf("//qsim::propagate_at_surface_CustomART pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n", - ctx.pidx, normal->x, normal->y, normal->z, length(*normal) ); - printf("//qsim::propagate_at_surface_CustomART pidx %7lld : cross_mom_nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lcross_mom_nrm = %10.8f \n", - ctx.pidx, cross_mom_nrm.x, cross_mom_nrm.y, cross_mom_nrm.z, length(cross_mom_nrm) ); - printf("//qsim::propagate_at_surface_CustomART pidx %7lld : dot_pol_cross_mom_nrm = %10.8f \n", ctx.pidx, dot_pol_cross_mom_nrm ); - printf("//qsim::propagate_at_surface_CustomART pidx %7lld : minus_cos_theta = %10.8f \n", ctx.pidx, minus_cos_theta ); - printf("//qsim::propagate_at_surface_CustomART pidx %7lld : lposcost = %10.8f (expect 0->1)\n", ctx.pidx, lposcost ); + float3 cross_mom_nrm = cross(p.mom, *normal); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = " + "%10.8f \n", + ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom)); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld : pol = np.array([%10.8f,%10.8f,%10.8f]) ; lpol = " + "%10.8f \n", + ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol)); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = " + "%10.8f \n", + ctx.pidx, normal->x, normal->y, normal->z, length(*normal)); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld : cross_mom_nrm = np.array([%10.8f,%10.8f,%10.8f]) ; " + "lcross_mom_nrm = %10.8f \n", + ctx.pidx, cross_mom_nrm.x, cross_mom_nrm.y, cross_mom_nrm.z, length(cross_mom_nrm)); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld : dot_pol_cross_mom_nrm = %10.8f \n", ctx.pidx, + dot_pol_cross_mom_nrm); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld : minus_cos_theta = %10.8f \n", ctx.pidx, + minus_cos_theta); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld : lposcost = %10.8f (expect 0->1)\n", ctx.pidx, + lposcost); } #endif // formerly excluded Custom4 hits onto WP PMTs see ~/j/issues/jok-tds-mu-running-NOT-A-SENSOR-warnings.rst - //if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_PMT_END ) - //if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_ATM_LPMT_END ) - if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_WAL_PMT_END ) + // if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_PMT_END ) + // if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_ATM_LPMT_END ) + if (lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_WAL_PMT_END) { - flag = NAN_ABORT ; + flag = NAN_ABORT; #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d : ERROR UNEXPECTED LPMTID : NAN_ABORT \n", ctx.pidx, lpmtid ); + printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d : ERROR UNEXPECTED LPMTID : NAN_ABORT \n", + ctx.pidx, lpmtid); #endif - return BREAK ; + return BREAK; } #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx_debug ) - printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f pmt %p pre-ATQC \n", - ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, pmt ); + if (ctx.pidx_debug) + printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f pmt %p " + "pre-ATQC \n", + ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, pmt); #endif - float ATQC[4] = {} ; + float ATQC[4] = {}; #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(lpmtid > -1 && pmt != nullptr) pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ctx.pidx, ctx.pidx_debug ); + if (lpmtid > -1 && pmt != nullptr) + pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ctx.pidx, + ctx.pidx_debug); #else - if(lpmtid > -1 && pmt != nullptr) pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost ); + if (lpmtid > -1 && pmt != nullptr) + pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost); #endif - - #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx_debug ) - printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f lpc %8.4f ATQC ( %8.4f %8.4f %8.4f %8.4f ) \n", - ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ATQC[0], ATQC[1], ATQC[2], ATQC[3] ); + if (ctx.pidx_debug) + printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f lpc %8.4f " + "ATQC ( %8.4f %8.4f %8.4f %8.4f ) \n", + ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ATQC[0], ATQC[1], + ATQC[2], ATQC[3]); #endif - - const float& theAbsorption = ATQC[0]; - const float& theTransmittance = ATQC[1]; - const float& theEfficiency = ATQC[2]; - const float& collectionEfficiency = ATQC[3]; + const float &theAbsorption = ATQC[0]; + const float &theTransmittance = ATQC[1]; + const float &theEfficiency = ATQC[2]; + const float &collectionEfficiency = ATQC[3]; float u_theAbsorption = curand_uniform(&rng); - int action = u_theAbsorption < theAbsorption ? BREAK : CONTINUE ; - + int action = u_theAbsorption < theAbsorption ? BREAK : CONTINUE; #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx_debug ) - printf("//qsim.propagate_at_surface_CustomART pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f %8.4f %8.4f ) u_theAbsorption %7.3f action %d \n", - ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], u_theAbsorption, action ); + if (ctx.pidx_debug) + printf("//qsim.propagate_at_surface_CustomART pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f %8.4f %8.4f ) " + "u_theAbsorption %7.3f action %d \n", + ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], u_theAbsorption, action); #endif - if( action == BREAK ) + if (action == BREAK) { - float u_theEfficiency = curand_uniform(&rng) ; + float u_theEfficiency = curand_uniform(&rng); float u_collectionEfficiency = curand_uniform(&rng); - flag = u_theEfficiency < theEfficiency ? - ( u_collectionEfficiency < collectionEfficiency ? EFFICIENCY_COLLECT : EFFICIENCY_CULL ) - : - SURFACE_ABSORB - ; + flag = u_theEfficiency < theEfficiency + ? (u_collectionEfficiency < collectionEfficiency ? EFFICIENCY_COLLECT : EFFICIENCY_CULL) + : SURFACE_ABSORB; - // former SD:SURFACE_DETECT, now becomes EC:EFFICIENCY_COLLECT or EX:EFFICIENCY_CULL depending on collectionEfficiency and random throw + // former SD:SURFACE_DETECT, now becomes EC:EFFICIENCY_COLLECT or EX:EFFICIENCY_CULL depending on + // collectionEfficiency and random throw #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx_debug ) - printf("//qsim.propagate_at_surface_CustomART.BREAK.SD/SA EC/EX pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f %8.4f %8.4f ) u_theEfficiency %8.4f theEfficiency %8.4f flag %d \n", - ctx.pidx, lpmtid, ATQC[0],ATQC[1], ATQC[2],ATQC[3], u_theEfficiency, theEfficiency, flag ); + if (ctx.pidx_debug) + printf("//qsim.propagate_at_surface_CustomART.BREAK.SD/SA EC/EX pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f " + "%8.4f %8.4f ) u_theEfficiency %8.4f theEfficiency %8.4f flag %d \n", + ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], u_theEfficiency, theEfficiency, flag); #endif - } else { #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx_debug ) - printf("//qsim.propagate_at_surface_CustomART.CONTINUE pidx %7lld lpmtid %d ATQC ( %7.3f %7.3f %7.3f %7.3f ) theTransmittance %7.3f \n", - ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], theTransmittance ); + if (ctx.pidx_debug) + printf("//qsim.propagate_at_surface_CustomART.CONTINUE pidx %7lld lpmtid %d ATQC ( %7.3f %7.3f %7.3f %7.3f " + ") theTransmittance %7.3f \n", + ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], theTransmittance); #endif - propagate_at_boundary( flag, rng, ctx, theTransmittance ); + propagate_at_boundary(flag, rng, ctx, theTransmittance); } - return action ; + return action; } #endif #endif - -#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) /** qsim::reflect_diffuse cf G4OpBoundaryProcess::DoReflection @@ -1974,41 +2016,39 @@ orient is used to flip the reflection normal back against the incident direction **/ -inline QSIM_METHOD void qsim::reflect_diffuse( RNG& rng, sctx& ctx ) +inline QSIM_METHOD void qsim::reflect_diffuse(RNG &rng, sctx &ctx) { - sphoton& p = ctx.p ; + sphoton &p = ctx.p; - float3 old_mom = p.mom ; + float3 old_mom = p.mom; - const float3* normal = ctx.prd->normal() ; // geometrical outwards normal + const float3 *normal = ctx.prd->normal(); // geometrical outwards normal - //const float orient = -1.f ; // BUG : FIXED ORIENT FLIP CANNOT BE CORRECT - const float orient = dot( old_mom, *normal ) > 0.f ? -1.f : 1.f ; + // const float orient = -1.f ; // BUG : FIXED ORIENT FLIP CANNOT BE CORRECT + const float orient = dot(old_mom, *normal) > 0.f ? -1.f : 1.f; - lambertian_direction( &p.mom, normal, orient, rng, ctx ); + lambertian_direction(&p.mom, normal, orient, rng, ctx); - - float3 facet_normal = normalize( p.mom - old_mom ); - const float EdotN = dot( p.pol, facet_normal ); - p.pol = -1.f*(p.pol) + 2.f*EdotN*facet_normal ; + float3 facet_normal = normalize(p.mom - old_mom); + const float EdotN = dot(p.pol, facet_normal); + p.pol = -1.f * (p.pol) + 2.f * EdotN * facet_normal; #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.reflect_diffuse pidx %7lld : old_mom = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, old_mom.x, old_mom.y, old_mom.z ) ; + printf("//qsim.reflect_diffuse pidx %7lld : old_mom = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, old_mom.x, + old_mom.y, old_mom.z); - printf("//qsim.reflect_diffuse pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, normal->x, normal->y, normal->z ) ; + printf("//qsim.reflect_diffuse pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, normal->x, + normal->y, normal->z); - printf("//qsim.reflect_diffuse pidx %7lld : p.mom = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, p.mom.x, p.mom.y, p.mom.z ) ; + printf("//qsim.reflect_diffuse pidx %7lld : p.mom = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, p.mom.x, + p.mom.y, p.mom.z); - printf("//qsim.reflect_diffuse pidx %7lld : facet_normal = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, facet_normal.x, facet_normal.y, facet_normal.z ) ; + printf("//qsim.reflect_diffuse pidx %7lld : facet_normal = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, + facet_normal.x, facet_normal.y, facet_normal.z); } #endif - } /** @@ -2027,58 +2067,56 @@ to be helpful. **/ -inline QSIM_METHOD void qsim::reflect_specular( RNG& rng, sctx& ctx ) +inline QSIM_METHOD void qsim::reflect_specular(RNG &rng, sctx &ctx) { - sphoton& p = ctx.p ; - const float3* normal = ctx.prd->normal() ; + sphoton &p = ctx.p; + const float3 *normal = ctx.prd->normal(); #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.reflect_specular.head pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, normal->x, normal->y, normal->z ) ; + printf("//qsim.reflect_specular.head pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, + normal->x, normal->y, normal->z); - printf("//qsim.reflect_specular.head pidx %7lld : mom0 = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, p.mom.x, p.mom.y, p.mom.z ) ; + printf("//qsim.reflect_specular.head pidx %7lld : mom0 = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, + p.mom.x, p.mom.y, p.mom.z); - printf("//qsim.reflect_specular.head pidx %7lld : pol0 = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, p.pol.x, p.pol.y, p.pol.z ) ; + printf("//qsim.reflect_specular.head pidx %7lld : pol0 = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, + p.pol.x, p.pol.y, p.pol.z); } #endif #ifdef WITH_ORIENT - //const float orient = -1.f ; - const float orient = 1.f ; + // const float orient = -1.f ; + const float orient = 1.f; // because orient appears twice in the below p.mom p.pol calcs // it being +1.f or -1.f makes no difference - const float PdotN = dot( p.mom, *normal )*orient ; - p.mom = p.mom - 2.f*PdotN*(*normal)*orient ; + const float PdotN = dot(p.mom, *normal) * orient; + p.mom = p.mom - 2.f * PdotN * (*normal) * orient; - const float EdotN = dot( p.pol, *normal )*orient ; - p.pol = -1.f*(p.pol) + 2.f*EdotN*(*normal)*orient ; + const float EdotN = dot(p.pol, *normal) * orient; + p.pol = -1.f * (p.pol) + 2.f * EdotN * (*normal) * orient; #else // removed orient as does not effect calc, hence confusing and pointless - const float PdotN = dot( p.mom, *normal ) ; - p.mom = p.mom - 2.f*PdotN*(*normal) ; + const float PdotN = dot(p.mom, *normal); + p.mom = p.mom - 2.f * PdotN * (*normal); - const float EdotN = dot( p.pol, *normal ) ; - p.pol = -1.f*(p.pol) + 2.f*EdotN*(*normal) ; + const float EdotN = dot(p.pol, *normal); + p.pol = -1.f * (p.pol) + 2.f * EdotN * (*normal); #endif #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(ctx.pidx == base->pidx) + if (ctx.pidx == base->pidx) { - printf("//qsim.reflect_specular.tail pidx %7lld : mom1 = np.array([%10.5f,%10.5f,%10.5f]) ; PdotN = %10.5f ; EdotN = %10.5f \n", - ctx.pidx, p.mom.x, p.mom.y, p.mom.z, PdotN, EdotN ) ; - - printf("//qsim.reflect_specular.tail pidx %7lld : pol1 = np.array([%10.5f,%10.5f,%10.5f]) \n", - ctx.pidx, p.pol.x, p.pol.y, p.pol.z ) ; + printf("//qsim.reflect_specular.tail pidx %7lld : mom1 = np.array([%10.5f,%10.5f,%10.5f]) ; PdotN = %10.5f ; " + "EdotN = %10.5f \n", + ctx.pidx, p.mom.x, p.mom.y, p.mom.z, PdotN, EdotN); + printf("//qsim.reflect_specular.tail pidx %7lld : pol1 = np.array([%10.5f,%10.5f,%10.5f]) \n", ctx.pidx, + p.pol.x, p.pol.y, p.pol.z); } #endif - - } /** @@ -2109,7 +2147,8 @@ Stages within bounce loop 3. mutate photon and set flag using material properties - * note that photons that SAIL to boundary are mutated twice within the while loop (by propagate_to_boundary and propagate_at_boundary/surface) + * note that photons that SAIL to boundary are mutated twice within the while loop (by propagate_to_boundary and +propagate_at_boundary/surface) Thoughts ~~~~~~~~~~~ @@ -2120,47 +2159,50 @@ so can switch them off easily in production running **/ -inline QSIM_METHOD void qsim::fake_propagate( sphoton& p, const quad2* mock_prd, RNG& rng, unsigned long long idx ) +inline QSIM_METHOD void qsim::fake_propagate(sphoton &p, const quad2 *mock_prd, RNG &rng, unsigned long long idx) { - p.set_flag(TORCH); // setting initial flag : in reality this should be done by generation + p.set_flag(TORCH); // setting initial flag : in reality this should be done by generation - qsim* sim = this ; + qsim *sim = this; - sctx ctx = {} ; - ctx.p = p ; // Q: Why is this different from CSGOptiX7.cu:simulate ? A: Presumably due to input photon. - ctx.evt = evt ; - ctx.pidx = idx ; + sctx ctx = {}; + ctx.p = p; // Q: Why is this different from CSGOptiX7.cu:simulate ? A: Presumably due to input photon. + ctx.evt = evt; + ctx.pidx = idx; - int command = START ; - int bounce = 0 ; + int command = START; + int bounce = 0; #ifndef PRODUCTION ctx.point(bounce); #endif - while( bounce < evt->max_bounce ) + while (bounce < evt->max_bounce) { - ctx.prd = mock_prd + (evt->max_bounce*idx+bounce) ; - if( ctx.prd->boundary() == 0xffffu ) break ; // SHOULD NEVER HAPPEN : propagate can do nothing meaningful without a boundary + ctx.prd = mock_prd + (evt->max_bounce * idx + bounce); + if (ctx.prd->boundary() == 0xffffu) + break; // SHOULD NEVER HAPPEN : propagate can do nothing meaningful without a boundary #ifndef PRODUCTION ctx.trace(bounce); #endif #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if(idx == base->pidx) - printf("//qsim.fake_propagate pidx %7lld bounce %d evt.max_bounce %d prd.q0.f.xyzw (%10.4f %10.4f %10.4f %10.4f) \n", - idx, bounce, evt->max_bounce, ctx.prd->q0.f.x, ctx.prd->q0.f.y, ctx.prd->q0.f.z, ctx.prd->q0.f.w ); + if (idx == base->pidx) + printf("//qsim.fake_propagate pidx %7lld bounce %d evt.max_bounce %d prd.q0.f.xyzw (%10.4f %10.4f %10.4f " + "%10.4f) \n", + idx, bounce, evt->max_bounce, ctx.prd->q0.f.x, ctx.prd->q0.f.y, ctx.prd->q0.f.z, ctx.prd->q0.f.w); #endif - command = sim->propagate(bounce, rng, ctx ); + command = sim->propagate(bounce, rng, ctx); bounce++; #ifndef PRODUCTION ctx.point(bounce); #endif - if(command == BREAK) break ; + if (command == BREAK) + break; } #ifndef PRODUCTION ctx.end(); #endif - evt->photon[idx] = ctx.p ; + evt->photon[idx] = ctx.p; } /** @@ -2215,43 +2257,44 @@ Prior to supporting special surfaces, within the command == BOUNDARY used:: **/ -inline QSIM_METHOD int qsim::propagate(const int bounce, RNG& rng, sctx& ctx ) // ::simulate +inline QSIM_METHOD int qsim::propagate(const int bounce, RNG &rng, sctx &ctx) // ::simulate { - const unsigned boundary = ctx.prd->boundary() ; - const unsigned identity = ctx.prd->identity() ; // sensor_identifier+1, 0:not-a-sensor - const unsigned iindex = ctx.prd->iindex() ; - const float lposcost = ctx.prd->lposcost() ; // local frame intersect position cosine theta + const unsigned boundary = ctx.prd->boundary(); + const unsigned identity = ctx.prd->identity(); // sensor_identifier+1, 0:not-a-sensor + const unsigned iindex = ctx.prd->iindex(); + const float lposcost = ctx.prd->lposcost(); // local frame intersect position cosine theta - const float3* normal = ctx.prd->normal(); - float cosTheta = dot(ctx.p.mom, *normal ) ; + const float3 *normal = ctx.prd->normal(); + float cosTheta = dot(ctx.p.mom, *normal); #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx == base->pidx ) + if (ctx.pidx == base->pidx) { - printf("\n//qsim.propagate.head pidx %7lld : ctx.evt.index %d evt.index %d \n", ctx.pidx, ctx.evt->index, evt->index ); - - printf("\n//qsim.propagate.head pidx %7lld : bnc %d boundary %d cosTheta %10.8f \n", ctx.pidx, bounce, boundary, cosTheta ); + printf("\n//qsim.propagate.head pidx %7lld : ctx.evt.index %d evt.index %d \n", ctx.pidx, ctx.evt->index, + evt->index); - printf("//qsim.propagate.head pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = %10.8f \n", - ctx.pidx, ctx.p.mom.x, ctx.p.mom.y, ctx.p.mom.z, length(ctx.p.mom) ) ; + printf("\n//qsim.propagate.head pidx %7lld : bnc %d boundary %d cosTheta %10.8f \n", ctx.pidx, bounce, boundary, + cosTheta); - printf("//qsim.propagate.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n", - ctx.pidx, ctx.p.pos.x, ctx.p.pos.y, ctx.p.pos.z, length(ctx.p.pos) ) ; + printf("//qsim.propagate.head pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = %10.8f \n", + ctx.pidx, ctx.p.mom.x, ctx.p.mom.y, ctx.p.mom.z, length(ctx.p.mom)); - printf("//qsim.propagate.head pidx %7lld : nrm = np.array([(%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n", - ctx.pidx, normal->x, normal->y, normal->z, length(*normal) ); + printf("//qsim.propagate.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n", ctx.pidx, + ctx.p.pos.x, ctx.p.pos.y, ctx.p.pos.z, length(ctx.p.pos)); + printf("//qsim.propagate.head pidx %7lld : nrm = np.array([(%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n", + ctx.pidx, normal->x, normal->y, normal->z, length(*normal)); } #endif // copy geometry info into the sphoton struct - ctx.p.set_prd(boundary, identity, cosTheta, iindex ); // HMM: lposcost not passed along + ctx.p.set_prd(boundary, identity, cosTheta, iindex); // HMM: lposcost not passed along - bnd->fill_state(ctx.s, boundary, ctx.p.wavelength, cosTheta, ctx.pidx, base->pidx ); + bnd->fill_state(ctx.s, boundary, ctx.p.wavelength, cosTheta, ctx.pidx, base->pidx); - unsigned flag = 0 ; + unsigned flag = 0; - int command = propagate_to_boundary( flag, rng, ctx ); + int command = propagate_to_boundary(flag, rng, ctx); /** command possibilities: @@ -2262,52 +2305,55 @@ inline QSIM_METHOD int qsim::propagate(const int bounce, RNG& rng, sctx& ctx ) **/ #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx == base->pidx ) - printf("//qsim.propagate.body pidx %7lld bounce %d command %d flag %d s.optical.x %d s.optical.y %d \n", - ctx.pidx, bounce, command, flag, ctx.s.optical.x, ctx.s.optical.y ); + if (ctx.pidx == base->pidx) + printf("//qsim.propagate.body pidx %7lld bounce %d command %d flag %d s.optical.x %d s.optical.y %d \n", + ctx.pidx, bounce, command, flag, ctx.s.optical.x, ctx.s.optical.y); #endif - if( command == BOUNDARY ) + if (command == BOUNDARY) { - const int& ems = ctx.s.optical.y ; + const int &ems = ctx.s.optical.y; #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx == base->pidx ) + if (ctx.pidx == base->pidx) { #if defined(WITH_CUSTOM4) - printf("//qsim.propagate.body.WITH_CUSTOM4 pidx %7lld BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, lposcost ); + printf("//qsim.propagate.body.WITH_CUSTOM4 pidx %7lld BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, + lposcost); #else - printf("//qsim.propagate.body.NOT:WITH_CUSTOM4 pidx %7lld BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, lposcost); + printf("//qsim.propagate.body.NOT:WITH_CUSTOM4 pidx %7lld BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, + lposcost); #endif } #endif - if( ems == smatsur_NoSurface ) + if (ems == smatsur_NoSurface) { - command = propagate_at_boundary( flag, rng, ctx ) ; + command = propagate_at_boundary(flag, rng, ctx); } - else if( ems == smatsur_Surface ) + else if (ems == smatsur_Surface) { - command = propagate_at_surface( flag, rng, ctx ) ; + command = propagate_at_surface(flag, rng, ctx); } - else if( lposcost < 0.f ) // could combine with prior, but handy for debug to keep separate + else if (lposcost < 0.f) // could combine with prior, but handy for debug to keep separate { #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx == base->pidx ) + if (ctx.pidx == base->pidx) printf("//qsim.propagate.body (lposcost < 0.f) pidx %7lld bounce %d command %d flag %d ems %d \n", - ctx.pidx, bounce, command, flag, ems ); + ctx.pidx, bounce, command, flag, ems); #endif - command = propagate_at_surface( flag, rng, ctx ) ; + command = propagate_at_surface(flag, rng, ctx); } - else if( ems == smatsur_Surface_zplus_sensor_A ) + else if (ems == smatsur_Surface_zplus_sensor_A) { - command = propagate_at_surface_Detect( flag, rng, ctx ) ; + command = propagate_at_surface_Detect(flag, rng, ctx); } - else if( ems == smatsur_Surface_zplus_sensor_CustomART ) + else if (ems == smatsur_Surface_zplus_sensor_CustomART) { #if defined(WITH_CUSTOM4) - command = propagate_at_surface_CustomART( flag, rng, ctx ) ; - //command = base->custom_lut == 0u ? propagate_at_surface_CustomART( flag, rng, ctx ) : propagate_at_surface_MultiFilm(flag, rng, ctx ); + command = propagate_at_surface_CustomART(flag, rng, ctx); + // command = base->custom_lut == 0u ? propagate_at_surface_CustomART( flag, rng, ctx ) : + // propagate_at_surface_MultiFilm(flag, rng, ctx ); #endif } @@ -2316,14 +2362,13 @@ inline QSIM_METHOD int qsim::propagate(const int bounce, RNG& rng, sctx& ctx ) // Q: Does flag need to be single bit at this point OR can multiple "flags" be OR-ed together here ? // A: Decided to keep the flag as single bitted, and directly set EFFICENCY_COLLECT/CULL into ctx.p.flagmask - #if !defined(PRODUCTION) && defined(DEBUG_PIDX) - if( ctx.pidx == base->pidx ) - printf("//qsim.propagate.tail pidx %7lld bounce %d command %d flag %d ctx.s.optical.y(ems) %d \n", - ctx.pidx, bounce, command, flag, ctx.s.optical.y ); + if (ctx.pidx == base->pidx) + printf("//qsim.propagate.tail pidx %7lld bounce %d command %d flag %d ctx.s.optical.y(ems) %d \n", ctx.pidx, + bounce, command, flag, ctx.s.optical.y); #endif - return command ; + return command; } /** Q: Where does ctx.s.optical come from ? @@ -2339,8 +2384,6 @@ A: YES, but non-trivially and probably confusingly. This is because **/ - - /** qsim::hemisphere_polarized ------------------------------ @@ -2378,48 +2421,55 @@ inwards. **/ -inline QSIM_METHOD void qsim::hemisphere_polarized( unsigned polz, bool inwards, RNG& rng, sctx& ctx ) +inline QSIM_METHOD void qsim::hemisphere_polarized(unsigned polz, bool inwards, RNG &rng, sctx &ctx) { - sphoton& p = ctx.p ; - const float3* normal = ctx.prd->normal() ; + sphoton &p = ctx.p; + const float3 *normal = ctx.prd->normal(); - //printf("//qsim.hemisphere_polarized polz %d normal (%10.4f, %10.4f, %10.4f) \n", polz, normal->x, normal->y, normal->z ); + // printf("//qsim.hemisphere_polarized polz %d normal (%10.4f, %10.4f, %10.4f) \n", polz, normal->x, normal->y, + // normal->z ); - float u_hemipol_phi = curand_uniform(&rng) ; - float phi = u_hemipol_phi*2.f*M_PIf; // 0->2pi - float cosTheta = curand_uniform(&rng) ; // 0->1 + float u_hemipol_phi = curand_uniform(&rng); + float phi = u_hemipol_phi * 2.f * M_PIf; // 0->2pi + float cosTheta = curand_uniform(&rng); // 0->1 #if !defined(PRODUCTION) && defined(DEBUG_TAG) - stagr& tagr = ctx.tagr ; - tagr.add( stag_hp_ph, u_hemipol_phi ); - tagr.add( stag_hp_ph, cosTheta ); // trying to reduce stag::BITS from 5 to 4, so change stag_hp_ct to stag_hp_ph + stagr &tagr = ctx.tagr; + tagr.add(stag_hp_ph, u_hemipol_phi); + tagr.add(stag_hp_ph, cosTheta); // trying to reduce stag::BITS from 5 to 4, so change stag_hp_ct to stag_hp_ph #endif - float sinTheta = sqrtf(1.f-cosTheta*cosTheta); + float sinTheta = sqrtf(1.f - cosTheta * cosTheta); - p.mom.x = cosf(phi)*sinTheta ; - p.mom.y = sinf(phi)*sinTheta ; - p.mom.z = cosTheta ; + p.mom.x = cosf(phi) * sinTheta; + p.mom.y = sinf(phi) * sinTheta; + p.mom.z = cosTheta; - smath::rotateUz( p.mom, (*normal) * ( inwards ? -1.f : 1.f )); + smath::rotateUz(p.mom, (*normal) * (inwards ? -1.f : 1.f)); - //printf("//qsim.hemisphere_polarized polz %d p.mom (%10.4f, %10.4f, %10.4f) \n", polz, p.mom.x, p.mom.y, p.mom.z ); + // printf("//qsim.hemisphere_polarized polz %d p.mom (%10.4f, %10.4f, %10.4f) \n", polz, p.mom.x, p.mom.y, p.mom.z + // ); // what about normal incidence ? - const float3 transverse = normalize(cross(p.mom, (*normal) * ( inwards ? -1.f : 1.f ) )) ; // perpendicular to plane of incidence - const float3 within = normalize( cross(p.mom, transverse) ); // within plane of incidence and perpendicular to direction + const float3 transverse = + normalize(cross(p.mom, (*normal) * (inwards ? -1.f : 1.f))); // perpendicular to plane of incidence + const float3 within = + normalize(cross(p.mom, transverse)); // within plane of incidence and perpendicular to direction - switch(polz) + switch (polz) { - case 0: p.pol = transverse ; break ; // S-polarizatiom - case 1: p.pol = within ; break ; // P-polarization - case 2: p.pol = normalize( 0.5f*transverse + (1.f-0.5f)*within ) ; break ; // equal admixture + case 0: + p.pol = transverse; + break; // S-polarizatiom + case 1: + p.pol = within; + break; // P-polarization + case 2: + p.pol = normalize(0.5f * transverse + (1.f - 0.5f) * within); + break; // equal admixture } } - - - /** qsim::generate_photon_simtrace -------------------------------- @@ -2446,68 +2496,96 @@ SEE : sevent::add_simtrace **/ -inline QSIM_METHOD void qsim::generate_photon_simtrace(quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const +inline QSIM_METHOD void qsim::generate_photon_simtrace(quad4 &p, RNG &rng, const quad6 &gs, + unsigned long long photon_id, unsigned genstep_id) const { - const int& gencode = gs.q0.i.x ; - switch(gencode) + const int &gencode = gs.q0.i.x; + switch (gencode) { - case OpticksGenstep_FRAME: generate_photon_simtrace_frame(p, rng, gs, photon_id, genstep_id ); break ; - case OpticksGenstep_INPUT_PHOTON_SIMTRACE: { p = (quad4&)evt->simtrace[photon_id] ; } ; break ; + case OpticksGenstep_FRAME: + generate_photon_simtrace_frame(p, rng, gs, photon_id, genstep_id); + break; + case OpticksGenstep_INPUT_PHOTON_SIMTRACE: { + p = (quad4 &)evt->simtrace[photon_id]; + }; + break; } } -inline QSIM_METHOD void qsim::generate_photon_simtrace_frame(quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const +inline QSIM_METHOD void qsim::generate_photon_simtrace_frame(quad4 &p, RNG &rng, const quad6 &gs, + unsigned long long photon_id, unsigned genstep_id) const { - C4U gsid ; + C4U gsid; - //int gencode = gs.q0.i.x ; - int gridaxes = gs.q0.i.y ; // { XYZ, YZ, XZ, XY } - gsid.u = gs.q0.i.z ; - //unsigned num_photons = gs.q0.u.w ; + // int gencode = gs.q0.i.x ; + int gridaxes = gs.q0.i.y; // { XYZ, YZ, XZ, XY } + gsid.u = gs.q0.i.z; + // unsigned num_photons = gs.q0.u.w ; - p.q0.f.x = gs.q1.f.x ; // start with genstep local frame position, typically origin (0,0,0) - p.q0.f.y = gs.q1.f.y ; - p.q0.f.z = gs.q1.f.z ; - p.q0.f.w = 1.f ; + p.q0.f.x = gs.q1.f.x; // start with genstep local frame position, typically origin (0,0,0) + p.q0.f.y = gs.q1.f.y; + p.q0.f.z = gs.q1.f.z; + p.q0.f.w = 1.f; - //printf("//qsim.generate_photon_simtrace_frame gridaxes %d gs.q1 (%10.4f %10.4f %10.4f %10.4f) \n", gridaxes, gs.q1.f.x, gs.q1.f.y, gs.q1.f.z, gs.q1.f.w ); + // printf("//qsim.generate_photon_simtrace_frame gridaxes %d gs.q1 (%10.4f %10.4f %10.4f %10.4f) \n", gridaxes, + // gs.q1.f.x, gs.q1.f.y, gs.q1.f.z, gs.q1.f.w ); float u0 = curand_uniform(&rng); float sinPhi, cosPhi; #if defined(MOCK_CURAND) || defined(MOCK_CUDA) - __sincosf(2.f*M_PIf*u0,&sinPhi,&cosPhi); + __sincosf(2.f * M_PIf * u0, &sinPhi, &cosPhi); #else - sincosf(2.f*M_PIf*u0,&sinPhi,&cosPhi); + sincosf(2.f * M_PIf * u0, &sinPhi, &cosPhi); #endif float u1 = curand_uniform(&rng); - float cosTheta = 2.f*u1 - 1.f ; - float sinTheta = sqrtf(1.f-cosTheta*cosTheta) ; + float cosTheta = 2.f * u1 - 1.f; + float sinTheta = sqrtf(1.f - cosTheta * cosTheta); - //printf("//qsim.generate_photon_simtrace_frame u0 %10.4f sinPhi %10.4f cosPhi %10.4f \n", u0, sinPhi, cosPhi ); - //printf("//qsim.generate_photon_simtrace_frame u1 %10.4f sinTheta %10.4f cosTheta %10.4f \n", u1, sinTheta, cosTheta ); - //printf("//qsim.generate_photon_simtrace_frame u0 %10.4f sinPhi %10.4f cosPhi %10.4f u1 %10.4f sinTheta %10.4f cosTheta %10.4f \n", u0, sinPhi, cosPhi, u1, sinTheta, cosTheta ); + // printf("//qsim.generate_photon_simtrace_frame u0 %10.4f sinPhi %10.4f cosPhi %10.4f \n", u0, sinPhi, cosPhi + // ); printf("//qsim.generate_photon_simtrace_frame u1 %10.4f sinTheta %10.4f cosTheta %10.4f \n", u1, sinTheta, + // cosTheta ); printf("//qsim.generate_photon_simtrace_frame u0 %10.4f sinPhi %10.4f cosPhi %10.4f u1 %10.4f + // sinTheta %10.4f cosTheta %10.4f \n", u0, sinPhi, cosPhi, u1, sinTheta, cosTheta ); - switch( gridaxes ) + switch (gridaxes) { - case YZ: { p.q1.f.x = 0.f ; p.q1.f.y = cosPhi ; p.q1.f.z = sinPhi ; p.q1.f.w = 0.f ; } ; break ; - case XZ: { p.q1.f.x = cosPhi ; p.q1.f.y = 0.f ; p.q1.f.z = sinPhi ; p.q1.f.w = 0.f ; } ; break ; - case XY: { p.q1.f.x = cosPhi ; p.q1.f.y = sinPhi ; p.q1.f.z = 0.f ; p.q1.f.w = 0.f ; } ; break ; - case XYZ: { p.q1.f.x = sinTheta*cosPhi ; - p.q1.f.y = sinTheta*sinPhi ; - p.q1.f.z = cosTheta ; - p.q1.f.w = 0.f ; } ; break ; // previously used XZ + case YZ: { + p.q1.f.x = 0.f; + p.q1.f.y = cosPhi; + p.q1.f.z = sinPhi; + p.q1.f.w = 0.f; + }; + break; + case XZ: { + p.q1.f.x = cosPhi; + p.q1.f.y = 0.f; + p.q1.f.z = sinPhi; + p.q1.f.w = 0.f; + }; + break; + case XY: { + p.q1.f.x = cosPhi; + p.q1.f.y = sinPhi; + p.q1.f.z = 0.f; + p.q1.f.w = 0.f; + }; + break; + case XYZ: { + p.q1.f.x = sinTheta * cosPhi; + p.q1.f.y = sinTheta * sinPhi; + p.q1.f.z = cosTheta; + p.q1.f.w = 0.f; + }; + break; // previously used XZ } + qat4 qt(gs); // copy 4x4 transform from last 4 quads of genstep + qt.right_multiply_inplace(p.q0.f, 1.f); // position + qt.right_multiply_inplace(p.q1.f, 0.f); // direction - qat4 qt(gs) ; // copy 4x4 transform from last 4 quads of genstep - qt.right_multiply_inplace( p.q0.f, 1.f ); // position - qt.right_multiply_inplace( p.q1.f, 0.f ); // direction - - - unsigned char ucj = (photon_id < 255 ? photon_id : 255 ) ; - gsid.c4.w = ucj ; - p.q3.u.w = gsid.u ; // WARNING : THIS GSID LOOKS TO BE STOMPED ON BY sevent::add_simtrace + unsigned char ucj = (photon_id < 255 ? photon_id : 255); + gsid.c4.w = ucj; + p.q3.u.w = gsid.u; // WARNING : THIS GSID LOOKS TO BE STOMPED ON BY sevent::add_simtrace } /** @@ -2518,26 +2596,38 @@ Moved non-standard center-extent (aka frame) gensteps to use qsim::generate_phot **/ -inline QSIM_METHOD void qsim::generate_photon(sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const +inline QSIM_METHOD void qsim::generate_photon(sphoton &p, RNG &rng, const quad6 &gs, unsigned long long photon_id, + unsigned genstep_id) const { - const int& gencode = gs.q0.i.x ; - switch(gencode) + const int &gencode = gs.q0.i.x; + switch (gencode) { - case OpticksGenstep_CARRIER: scarrier::generate( p, rng, gs, photon_id, genstep_id) ; break ; - case OpticksGenstep_TORCH: storch::generate( p, rng, gs, photon_id, genstep_id ) ; break ; - - case OpticksGenstep_G4Cerenkov_modified: - case OpticksGenstep_CERENKOV: - cerenkov->generate( p, rng, gs, photon_id, genstep_id ) ; break ; - - case OpticksGenstep_DsG4Scintillation_r4695: - case OpticksGenstep_SCINTILLATION: - scint->generate( p, rng, gs, photon_id, genstep_id ) ; break ; - - case OpticksGenstep_INPUT_PHOTON: { p = evt->photon[photon_id] ; p.set_flag(TORCH) ; } ; break ; - default: generate_photon_dummy( p, rng, gs, photon_id, genstep_id) ; break ; + case OpticksGenstep_CARRIER: + scarrier::generate(p, rng, gs, photon_id, genstep_id); + break; + case OpticksGenstep_TORCH: + storch::generate(p, rng, gs, photon_id, genstep_id); + break; + + case OpticksGenstep_G4Cerenkov_modified: + case OpticksGenstep_CERENKOV: + cerenkov->generate(p, rng, gs, photon_id, genstep_id); + break; + + case OpticksGenstep_DsG4Scintillation_r4695: + case OpticksGenstep_SCINTILLATION: + scint->generate(p, rng, gs, photon_id, genstep_id); + break; + + case OpticksGenstep_INPUT_PHOTON: { + p = evt->photon[photon_id]; + p.set_flag(TORCH); + }; + break; + default: + generate_photon_dummy(p, rng, gs, photon_id, genstep_id); + break; } - p.set_index(photon_id) ; + p.set_index(photon_id); } #endif - diff --git a/qudarap/qwls.h b/qudarap/qwls.h new file mode 100644 index 000000000..0e4100726 --- /dev/null +++ b/qudarap/qwls.h @@ -0,0 +1,148 @@ +#pragma once +/** +qwls.h : GPU-side Wavelength Shifting +========================================= + +Device-side struct for WLS wavelength sampling via ICDF texture lookup. +Supports multiple WLS materials indexed by material ID. + +The ICDF texture layout: +- Each WLS material occupies 3 rows (standard, LHS HD, RHS HD) +- material_map[mat_idx] gives the base row for that material (-1 = no WLS) +- time_constants[wls_idx] gives the re-emission time constant in ns + +Wavelength sampling uses the same HD (high-definition) technique as qscint.h: +- hd_factor=20: 20x resolution at extremes (u < 0.05 or u > 0.95) +- Normalized texture coordinates with linear interpolation + +**/ + +#if defined(__CUDACC__) || defined(__CUDABE__) +#define QWLS_METHOD __device__ +#else +#define QWLS_METHOD +#endif + +struct qwls +{ + cudaTextureObject_t wls_tex; // ICDF texture: (num_wls*3, 4096, 1) + unsigned hd_factor; // 0, 10, or 20 + int *material_map; // device ptr: mat_idx -> base ICDF row (-1 = no WLS) + float *time_constants; // device ptr: per-WLS-material time constant (ns) + unsigned num_wls; // number of WLS materials + unsigned tex_height; // total rows in texture = num_wls * 3 + +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) + + QWLS_METHOD bool has_wls(unsigned mat_idx) const; + QWLS_METHOD float wavelength(unsigned mat_idx, const float &u0) const; + QWLS_METHOD float wavelength_at_row(unsigned base_row, const float &u0) const; + QWLS_METHOD float time_constant(unsigned mat_idx) const; + +#endif +}; + +#if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA) + +/** +qwls::has_wls +--------------- + +Returns true if material at mat_idx has WLS properties. +The material_map holds -1 for non-WLS materials. + +**/ + +inline QWLS_METHOD bool qwls::has_wls(unsigned mat_idx) const +{ + return material_map[mat_idx] >= 0; +} + +/** +qwls::time_constant +--------------------- + +Returns the WLS re-emission time constant in ns for the given material. +Returns 0.f if material has no WLS (instant re-emission / no delay). + +**/ + +inline QWLS_METHOD float qwls::time_constant(unsigned mat_idx) const +{ + int base_row = material_map[mat_idx]; + if (base_row < 0) + return 0.f; + unsigned wls_idx = base_row / 3; + return time_constants[wls_idx]; +} + +/** +qwls::wavelength +------------------- + +Sample a re-emitted wavelength from the WLS emission spectrum ICDF +for the material at mat_idx, using uniform random u0 in [0,1). + +Returns 0.f if material has no WLS (should not happen in practice +as callers check has_wls first). + +**/ + +inline QWLS_METHOD float qwls::wavelength(unsigned mat_idx, const float &u0) const +{ + int base_row = material_map[mat_idx]; + if (base_row < 0) + return 0.f; + return wavelength_at_row(unsigned(base_row), u0); +} + +/** +qwls::wavelength_at_row +-------------------------- + +ICDF texture lookup with HD (high-definition) support. +base_row is the first of 3 rows for this WLS material: + row 0: standard resolution (full CDF range) + row 1: LHS high-res (0.00 -> 0.05 for hd_factor=20) + row 2: RHS high-res (0.95 -> 1.00 for hd_factor=20) + +Uses normalized texture coordinates with linear interpolation, +matching the qscint.h implementation. + +**/ + +inline QWLS_METHOD float qwls::wavelength_at_row(unsigned base_row, const float &u0) const +{ + float y0 = (float(base_row) + 0.5f) / float(tex_height); + float y1 = (float(base_row + 1) + 0.5f) / float(tex_height); + float y2 = (float(base_row + 2) + 0.5f) / float(tex_height); + + float wl; + + if (hd_factor == 0) + { + wl = tex2D(wls_tex, u0, y0); + } + else if (hd_factor == 10) + { + if (u0 < 0.1f) + wl = tex2D(wls_tex, u0 * 10.f, y1); + else if (u0 > 0.9f) + wl = tex2D(wls_tex, (u0 - 0.9f) * 10.f, y2); + else + wl = tex2D(wls_tex, u0, y0); + } + else // hd_factor == 20 + { + if (u0 < 0.05f) + wl = tex2D(wls_tex, u0 * 20.f, y1); + else if (u0 > 0.95f) + wl = tex2D(wls_tex, (u0 - 0.95f) * 20.f, y2); + else + wl = tex2D(wls_tex, u0, y0); + } + + return wl; +} + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b58cd89c2..996a91cb3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -77,6 +77,24 @@ target_include_directories(GPUPhotonFileSource PRIVATE $ ) +# StandAloneGeant4Validation - pure G4 optical photon simulation (no opticks GPU) +# Links U4 for aligned mode (U4Random, InstrumentedG4OpBoundaryProcess, ShimG4Op*) +add_executable(StandAloneGeant4Validation StandAloneGeant4Validation.cpp StandAloneGeant4Validation.h) +target_link_libraries(StandAloneGeant4Validation gphox gphox_g4_deps U4) +target_compile_definitions(StandAloneGeant4Validation PRIVATE WITH_INSTRUMENTED_DEBUG) +target_include_directories(StandAloneGeant4Validation PRIVATE + $ + $ +) + +# G4ValidationGenstep - pure G4 electron→scintillation/Cerenkov→optical photon simulation +add_executable(G4ValidationGenstep G4ValidationGenstep.cpp G4ValidationGenstep.h) +target_link_libraries(G4ValidationGenstep gphox gphox_g4_deps) +target_include_directories(G4ValidationGenstep PRIVATE + $ + $ +) + # simtox creates a numpy file with initial photons for simulation add_executable(simtox simtox.cpp) @@ -87,7 +105,7 @@ target_include_directories(simtox PRIVATE $ ) -install(TARGETS consgeo simg4ox GPUCerenkov GPURaytrace GPUPhotonSource GPUPhotonSourceMinimal GPUPhotonFileSource simtox gphox gphox_g4_deps +install(TARGETS consgeo simg4ox GPUCerenkov GPURaytrace GPUPhotonSource GPUPhotonSourceMinimal GPUPhotonFileSource StandAloneGeant4Validation G4ValidationGenstep simtox gphox gphox_g4_deps EXPORT ${PROJECT_NAME}Targets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/src/G4ValidationGenstep.cpp b/src/G4ValidationGenstep.cpp new file mode 100644 index 000000000..c37ee170f --- /dev/null +++ b/src/G4ValidationGenstep.cpp @@ -0,0 +1,109 @@ +#include + +#include + +#include "FTFP_BERT.hh" +#include "G4OpticalPhysics.hh" +#include "G4RunManager.hh" +#include "G4UImanager.hh" +#include "G4VModularPhysicsList.hh" + +#include "G4ValidationGenstep.h" + +using namespace std; + +int main(int argc, char **argv) +{ + argparse::ArgumentParser program("G4ValidationGenstep", "0.0.0"); + + string gdml_file; + double energy_MeV = 1.0; + int num_events = 1; + + program.add_argument("-g", "--gdml") + .help("path to GDML file") + .default_value(string("apex.gdml")) + .nargs(1) + .store_into(gdml_file); + + program.add_argument("-e", "--energy") + .help("electron kinetic energy in MeV") + .default_value(1.0) + .scan<'g', double>() + .store_into(energy_MeV); + + program.add_argument("-n", "--nevents") + .help("number of events") + .default_value(1) + .scan<'i', int>() + .store_into(num_events); + + program.add_argument("-s", "--seed").help("random seed").scan<'i', long>(); + + program.add_argument("--pos") + .help("electron position x,y,z in mm (comma-separated)") + .default_value(string("0,0,0")); + + program.add_argument("--dir").help("electron direction x,y,z (comma-separated)").default_value(string("0,0,1")); + + try + { + program.parse_args(argc, argv); + } + catch (const exception &err) + { + cerr << err.what() << endl; + cerr << program; + exit(EXIT_FAILURE); + } + + long seed; + if (program.is_used("--seed")) + seed = program.get("--seed"); + else + seed = static_cast(time(nullptr)); + + // Parse position + G4ThreeVector pos(0, 0, 0); + { + string s = program.get("--pos"); + float x, y, z; + if (sscanf(s.c_str(), "%f,%f,%f", &x, &y, &z) == 3) + pos = G4ThreeVector(x, y, z); + } + + // Parse direction + G4ThreeVector dir(0, 0, 1); + { + string s = program.get("--dir"); + float x, y, z; + if (sscanf(s.c_str(), "%f,%f,%f", &x, &y, &z) == 3) + dir = G4ThreeVector(x, y, z); + } + + G4cout << "G4ValidationGenstep:" << G4endl; + G4cout << " GDML: " << gdml_file << G4endl; + G4cout << " Energy: " << energy_MeV << " MeV" << G4endl; + G4cout << " Events: " << num_events << G4endl; + G4cout << " Position: (" << pos.x() << "," << pos.y() << "," << pos.z() << ") mm" << G4endl; + G4cout << " Direction: (" << dir.x() << "," << dir.y() << "," << dir.z() << ")" << G4endl; + G4cout << " Seed: " << seed << G4endl; + + GenstepHitAccumulator accumulator; + + G4VModularPhysicsList *physics = new FTFP_BERT; + physics->RegisterPhysics(new G4OpticalPhysics); + + G4RunManager run_mgr; + run_mgr.SetUserInitialization(physics); + run_mgr.SetUserInitialization(new GenstepDetectorConstruction(gdml_file, &accumulator)); + run_mgr.SetUserInitialization(new GenstepActionInitialization(&accumulator, pos, dir, energy_MeV, num_events)); + run_mgr.Initialize(); + + CLHEP::HepRandom::setTheSeed(seed); + + G4cout << "G4Genstep: Starting " << num_events << " events..." << G4endl; + run_mgr.BeamOn(num_events); + + return EXIT_SUCCESS; +} diff --git a/src/G4ValidationGenstep.h b/src/G4ValidationGenstep.h new file mode 100644 index 000000000..b58211330 --- /dev/null +++ b/src/G4ValidationGenstep.h @@ -0,0 +1,277 @@ +#pragma once +/** +G4ValidationGenstep.h +====================== + +Pure G4 simulation with electron primary that produces scintillation/Cerenkov +optical photons. G4 handles all physics including optical photon propagation. +Collects hits via sensitive detector. Used as the CPU reference for comparison +with GPU (simg4ox) genstep-based optical simulation. +**/ + +#include +#include +#include + +#include "G4Electron.hh" +#include "G4Event.hh" +#include "G4GDMLParser.hh" +#include "G4OpticalPhoton.hh" +#include "G4PhysicalConstants.hh" +#include "G4PrimaryParticle.hh" +#include "G4PrimaryVertex.hh" +#include "G4Run.hh" +#include "G4SDManager.hh" +#include "G4SystemOfUnits.hh" +#include "G4THitsCollection.hh" +#include "G4ThreeVector.hh" +#include "G4Track.hh" +#include "G4TrackStatus.hh" +#include "G4UserEventAction.hh" +#include "G4UserRunAction.hh" +#include "G4VHit.hh" +#include "G4VPhysicalVolume.hh" +#include "G4VUserActionInitialization.hh" +#include "G4VUserDetectorConstruction.hh" +#include "G4VUserPrimaryGeneratorAction.hh" + +#include "sysrap/NP.hh" +#include "sysrap/sphoton.h" + +// ---- Hit accumulator ---- + +struct GenstepHitAccumulator +{ + std::mutex mtx; + std::vector hits; + int total_optical_photons = 0; + int total_scintillation = 0; + int total_cerenkov = 0; + + void AddHits(const std::vector &event_hits) + { + std::lock_guard lock(mtx); + hits.insert(hits.end(), event_hits.begin(), event_hits.end()); + } + + void Save(const char *filename) + { + std::lock_guard lock(mtx); + G4int num_hits = hits.size(); + NP *arr = NP::Make(num_hits, 4, 4); + for (int i = 0; i < num_hits; i++) + { + float *data = reinterpret_cast(&hits[i]); + std::copy(data, data + 16, arr->values() + i * 16); + } + arr->save(filename); + delete arr; + G4cout << "G4Genstep: Saved " << num_hits << " hits to " << filename << G4endl; + } +}; + +// ---- Sensitive Detector ---- + +struct GenstepPhotonHit : public G4VHit +{ + GenstepPhotonHit() = default; + + GenstepPhotonHit(G4double energy, G4double time, G4ThreeVector position, G4ThreeVector direction, + G4ThreeVector polarization) + : photon() + { + photon.pos = {static_cast(position.x()), static_cast(position.y()), + static_cast(position.z())}; + photon.time = time; + photon.mom = {static_cast(direction.x()), static_cast(direction.y()), + static_cast(direction.z())}; + photon.pol = {static_cast(polarization.x()), static_cast(polarization.y()), + static_cast(polarization.z())}; + photon.wavelength = h_Planck * c_light / (energy * CLHEP::eV); + } + + void Print() override + { + G4cout << photon << G4endl; + } + sphoton photon; +}; + +using GenstepPhotonHitsCollection = G4THitsCollection; + +struct GenstepPhotonSD : public G4VSensitiveDetector +{ + GenstepHitAccumulator *accumulator; + + GenstepPhotonSD(G4String name, GenstepHitAccumulator *acc) : G4VSensitiveDetector(name), accumulator(acc) + { + collectionName.insert(name + "_HC"); + } + + void Initialize(G4HCofThisEvent *hce) override + { + fHC = new GenstepPhotonHitsCollection(SensitiveDetectorName, collectionName[0]); + if (fHCID < 0) + fHCID = G4SDManager::GetSDMpointer()->GetCollectionID(collectionName[0]); + hce->AddHitsCollection(fHCID, fHC); + } + + G4bool ProcessHits(G4Step *aStep, G4TouchableHistory *) override + { + G4Track *track = aStep->GetTrack(); + if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition()) + return false; + + fHC->insert(new GenstepPhotonHit( + track->GetTotalEnergy(), track->GetGlobalTime(), aStep->GetPostStepPoint()->GetPosition(), + aStep->GetPostStepPoint()->GetMomentumDirection(), aStep->GetPostStepPoint()->GetPolarization())); + + track->SetTrackStatus(fStopAndKill); + return true; + } + + void EndOfEvent(G4HCofThisEvent *) override + { + G4int n = fHC->entries(); + std::vector event_hits; + event_hits.reserve(n); + for (GenstepPhotonHit *hit : *fHC->GetVector()) + event_hits.push_back(hit->photon); + accumulator->AddHits(event_hits); + } + + private: + GenstepPhotonHitsCollection *fHC = nullptr; + G4int fHCID = -1; +}; + +// ---- Detector Construction ---- + +struct GenstepDetectorConstruction : G4VUserDetectorConstruction +{ + GenstepDetectorConstruction(std::filesystem::path gdml_file, GenstepHitAccumulator *acc) + : gdml_file_(gdml_file), accumulator_(acc) + { + } + + G4VPhysicalVolume *Construct() override + { + parser_.Read(gdml_file_.string(), false); + return parser_.GetWorldVolume(); + } + + void ConstructSDandField() override + { + G4SDManager *SDman = G4SDManager::GetSDMpointer(); + const G4GDMLAuxMapType *auxmap = parser_.GetAuxMap(); + + for (auto const &[logVol, listType] : *auxmap) + { + for (auto const &auxtype : listType) + { + if (auxtype.type == "SensDet") + { + G4String name = logVol->GetName() + "_" + auxtype.value; + G4cout << "G4Genstep: Attaching SD to " << logVol->GetName() << G4endl; + GenstepPhotonSD *sd = new GenstepPhotonSD(name, accumulator_); + SDman->AddNewDetector(sd); + logVol->SetSensitiveDetector(sd); + } + } + } + } + + private: + std::filesystem::path gdml_file_; + G4GDMLParser parser_; + GenstepHitAccumulator *accumulator_; +}; + +// ---- Electron Primary Generator ---- + +struct ElectronPrimaryGenerator : G4VUserPrimaryGeneratorAction +{ + G4ThreeVector position; + G4ThreeVector direction; + G4double energy_MeV; + + ElectronPrimaryGenerator(G4ThreeVector pos, G4ThreeVector dir, G4double energy) + : position(pos), direction(dir.unit()), energy_MeV(energy) + { + } + + void GeneratePrimaries(G4Event *event) override + { + G4PrimaryVertex *vertex = new G4PrimaryVertex(position, 0.0); + G4PrimaryParticle *particle = new G4PrimaryParticle(G4Electron::Definition()); + particle->SetKineticEnergy(energy_MeV * MeV); + particle->SetMomentumDirection(direction); + vertex->SetPrimary(particle); + event->AddPrimaryVertex(vertex); + } +}; + +// ---- Event Action with optical photon counting ---- + +struct GenstepEventAction : G4UserEventAction +{ + GenstepHitAccumulator *accumulator; + int total_events; + + GenstepEventAction(GenstepHitAccumulator *acc, int total) : accumulator(acc), total_events(total) + { + } + + void EndOfEventAction(const G4Event *event) override + { + int id = event->GetEventID(); + if (id == 0 || (id + 1) % 10 == 0 || id + 1 == total_events) + G4cout << "G4Genstep: Event " << id + 1 << "/" << total_events << G4endl; + } +}; + +// ---- Run Action ---- + +struct GenstepRunAction : G4UserRunAction +{ + GenstepHitAccumulator *accumulator; + + GenstepRunAction(GenstepHitAccumulator *acc) : accumulator(acc) + { + } + + void EndOfRunAction(const G4Run *) override + { + G4cout << "G4Genstep: Total hits: " << accumulator->hits.size() << G4endl; + accumulator->Save("g4_genstep_hits.npy"); + } +}; + +// ---- Action Initialization ---- + +struct GenstepActionInitialization : G4VUserActionInitialization +{ + GenstepHitAccumulator *accumulator; + G4ThreeVector position; + G4ThreeVector direction; + G4double energy_MeV; + int num_events; + + GenstepActionInitialization(GenstepHitAccumulator *acc, G4ThreeVector pos, G4ThreeVector dir, G4double energy, + int nevt) + : accumulator(acc), position(pos), direction(dir), energy_MeV(energy), num_events(nevt) + { + } + + void BuildForMaster() const override + { + SetUserAction(new GenstepRunAction(accumulator)); + } + + void Build() const override + { + SetUserAction(new ElectronPrimaryGenerator(position, direction, energy_MeV)); + SetUserAction(new GenstepEventAction(accumulator, num_events)); + SetUserAction(new GenstepRunAction(accumulator)); + } +}; diff --git a/src/GPURaytrace.cpp b/src/GPURaytrace.cpp index c97c7ec11..bb668f293 100644 --- a/src/GPURaytrace.cpp +++ b/src/GPURaytrace.cpp @@ -11,7 +11,7 @@ #include "G4VisExecutive.hh" #include "sysrap/OPTICKS_LOG.hh" - +#include "config.h" #include "GPURaytrace.h" #include "G4RunManager.hh" @@ -68,6 +68,11 @@ int main(int argc, char **argv) .nargs(1) .store_into(macro_name); + program.add_argument("-c", "--config") + .help("config file name (without .json extension)") + .default_value(string("")) + .nargs(1); + program.add_argument("-i", "--interactive") .help("whether to open an interactive window with a viewer") .flag() @@ -108,6 +113,14 @@ int main(int argc, char **argv) G4App *g4app = new G4App(gdml_file); + // Load config and apply savephotonhistory flag if provided + string config_name = program.get("--config"); + if (!config_name.empty()) + { + gphox::Config cfg(config_name); + g4app->run_act_->fSavePhotonHistory = cfg.savephotonhistory; + } + ActionInitialization *actionInit = new ActionInitialization(g4app); run_mgr->SetUserInitialization(actionInit); run_mgr->SetUserInitialization(g4app->det_cons_); diff --git a/src/GPURaytrace.h b/src/GPURaytrace.h index f8443a167..589e60d75 100644 --- a/src/GPURaytrace.h +++ b/src/GPURaytrace.h @@ -1,3 +1,4 @@ +#include #include #include #include @@ -47,7 +48,9 @@ namespace { G4Mutex genstep_mutex = G4MUTEX_INITIALIZER; -} +G4Mutex g4hits_mutex = G4MUTEX_INITIALIZER; +std::vector> g4_accumulated_hits; +} // namespace bool IsSubtractionSolid(G4VSolid *solid) { @@ -331,7 +334,26 @@ struct EventAction : G4UserEventAction for (G4int i = 0; i < hce->GetNumberOfCollections(); i++) { G4VHitsCollection *hc = hce->GetHC(i); - if (hc) + if (!hc) + continue; + + PhotonHitsCollection *phc = dynamic_cast(hc); + if (phc) + { + G4AutoLock lock(&g4hits_mutex); + for (size_t j = 0; j < phc->entries(); j++) + { + PhotonHit *hit = (*phc)[j]; + float wl = 1239.84198f / static_cast(hit->fenergy); + g4_accumulated_hits.push_back( + {float(hit->fposition.x()), float(hit->fposition.y()), float(hit->fposition.z()), + float(hit->ftime), float(hit->fdirection.x()), float(hit->fdirection.y()), + float(hit->fdirection.z()), 0.f, float(hit->fpolarization.x()), + float(hit->fpolarization.y()), float(hit->fpolarization.z()), wl, 0.f, 0.f, 0.f, 0.f}); + } + fTotalG4Hits += phc->entries(); + } + else { fTotalG4Hits += hc->GetSize(); } @@ -348,6 +370,7 @@ struct EventAction : G4UserEventAction struct RunAction : G4UserRunAction { EventAction *fEventAction; + bool fSavePhotonHistory{false}; RunAction(EventAction *eventAction) : fEventAction(eventAction) { @@ -379,44 +402,39 @@ struct RunAction : G4UserRunAction std::cout << "Opticks: NumCollected: " << sev->GetNumPhotonCollected(0) << std::endl; std::cout << "Opticks: NumHits: " << num_hits << std::endl; std::cout << "Geant4: NumHits: " << fEventAction->GetTotalG4Hits() << std::endl; - std::ofstream outFile("opticks_hits_output.txt"); - if (!outFile.is_open()) - { - std::cerr << "Error opening output file!" << std::endl; - return; - } - for (int idx = 0; idx < int(num_hits); idx++) + if (fSavePhotonHistory) { - sphoton hit; - sev->getHit(hit, idx); - G4ThreeVector position = G4ThreeVector(hit.pos.x, hit.pos.y, hit.pos.z); - G4ThreeVector direction = G4ThreeVector(hit.mom.x, hit.mom.y, hit.mom.z); - G4ThreeVector polarization = G4ThreeVector(hit.pol.x, hit.pol.y, hit.pol.z); - int theCreationProcessid; - if (OpticksPhoton::HasCerenkovFlag(hit.flagmask)) - { - theCreationProcessid = 0; - } - else if (OpticksPhoton::HasScintillationFlag(hit.flagmask)) + // Save full SEvt (photon, record, seq, hit) when DebugLite/DebugHeavy + sev->save(); + std::cout << "SEvt::save() complete" << std::endl; + + // Save GPU hits as .npy (sphoton layout: N x 4 x 4 float32) { - theCreationProcessid = 1; + NP *gpu_h = NP::Make(num_hits, 4, 4); + for (unsigned idx = 0; idx < num_hits; idx++) + { + sphoton hit; + sev->getHit(hit, idx); + memcpy(gpu_h->bytes() + idx * sizeof(sphoton), &hit, sizeof(sphoton)); + } + gpu_h->save("gpu_hits.npy"); + std::cout << "Saved GPU hits: " << num_hits << " to gpu_hits.npy" << std::endl; } - else + + // Save G4 hits as .npy (same layout: N x 4 x 4 float32) { - theCreationProcessid = -1; + G4AutoLock lock(&g4hits_mutex); + size_t ng4 = g4_accumulated_hits.size(); + if (ng4 > 0) + { + NP *g4h = NP::Make(ng4, 4, 4); + memcpy(g4h->bytes(), g4_accumulated_hits.data(), ng4 * 16 * sizeof(float)); + g4h->save("g4_hits.npy"); + std::cout << "Saved G4 hits: " << ng4 << " to g4_hits.npy" << std::endl; + } } - // std::cout << "Adding hit from Opticks:" << hit.wavelength << " " << position << " " << direction - // << " - // " - // << polarization << std::endl; - outFile << hit.time << " " << hit.wavelength << " " << "(" << position.x() << ", " << position.y() - << ", " << position.z() << ") " << "(" << direction.x() << ", " << direction.y() << ", " - << direction.z() << ") " << "(" << polarization.x() << ", " << polarization.y() << ", " - << polarization.z() << ") " << "CreationProcessID=" << theCreationProcessid << std::endl; } - - outFile.close(); } } }; @@ -523,10 +541,42 @@ struct SteppingAction : G4UserSteppingAction << G4endl; return; } - G4double SCINTILLATIONTIMECONSTANT1 = MPT->GetConstProperty(kSCINTILLATIONTIMECONSTANT1); + // G4 11.x supports up to 3 scintillation components + const G4int tcKeys[3] = {kSCINTILLATIONTIMECONSTANT1, kSCINTILLATIONTIMECONSTANT2, kSCINTILLATIONTIMECONSTANT3}; + const G4int yieldKeys[3] = {kSCINTILLATIONYIELD1, kSCINTILLATIONYIELD2, kSCINTILLATIONYIELD3}; - U4::CollectGenstep_DsG4Scintillation_r4695(aTrack, aStep, fNumPhotons, 1, - SCINTILLATIONTIMECONSTANT1); + G4double tc[3] = {0, 0, 0}; + G4double yield[3] = {0, 0, 0}; + G4double yieldSum = 0; + G4int nComp = 0; + + for (G4int c = 0; c < 3; c++) + { + if (MPT->ConstPropertyExists(tcKeys[c])) + { + tc[c] = MPT->GetConstProperty(tcKeys[c]); + yield[c] = MPT->ConstPropertyExists(yieldKeys[c]) + ? MPT->GetConstProperty(yieldKeys[c]) + : (c == 0 ? 1.0 : 0.0); + yieldSum += yield[c]; + nComp = c + 1; + } + } + + G4AutoLock lock(&genstep_mutex); + G4int nRemaining = fNumPhotons; + for (G4int c = 0; c < nComp; c++) + { + G4int nPhotComp; + if (c == nComp - 1) + nPhotComp = nRemaining; // last component gets remainder + else + nPhotComp = static_cast(fNumPhotons * yield[c] / yieldSum); + nRemaining -= nPhotComp; + + if (nPhotComp > 0) + U4::CollectGenstep_DsG4Scintillation_r4695(aTrack, aStep, nPhotComp, c + 1, tc[c]); + } } } } diff --git a/src/StandAloneGeant4Validation.cpp b/src/StandAloneGeant4Validation.cpp new file mode 100644 index 000000000..40e3102fa --- /dev/null +++ b/src/StandAloneGeant4Validation.cpp @@ -0,0 +1,163 @@ +#include +#include + +#include + +#include "FTFP_BERT.hh" +#include "G4MTRunManager.hh" +#include "G4OpticalPhysics.hh" +#include "G4RunManager.hh" +#include "G4UImanager.hh" +#include "G4VModularPhysicsList.hh" + +#include "G4OpticalParameters.hh" + +#include "StandAloneGeant4Validation.h" +#include "config.h" + +using namespace std; + +int main(int argc, char **argv) +{ + argparse::ArgumentParser program("StandAloneGeant4Validation", "0.0.0"); + + string gdml_file, config_name; + int num_threads = 0; + + program.add_argument("-g", "--gdml") + .help("path to GDML file") + .default_value(string("geom.gdml")) + .nargs(1) + .store_into(gdml_file); + + program.add_argument("-c", "--config") + .help("the name of a config file") + .default_value(string("dev")) + .nargs(1) + .store_into(config_name); + + program.add_argument("-s", "--seed").help("fixed random seed (default: time-based)").scan<'i', long>(); + + program.add_argument("-t", "--threads") + .help("number of threads (0=sequential, default: hardware concurrency)") + .default_value(-1) + .scan<'i', int>() + .store_into(num_threads); + + program.add_argument("--aligned") + .help("enable photon-by-photon aligned comparison with GPU (forces sequential)") + .default_value(false) + .implicit_value(true); + + try + { + program.parse_args(argc, argv); + } + catch (const exception &err) + { + cerr << err.what() << endl; + cerr << program; + exit(EXIT_FAILURE); + } + + long seed; + if (program.is_used("--seed")) + seed = program.get("--seed"); + else + seed = static_cast(time(nullptr)); + + bool aligned = program.get("--aligned"); + + gphox::Config cfg(config_name); + int total_photons = cfg.torch.numphoton; + + // Aligned mode forces sequential (U4Random is single-threaded) + if (aligned) + num_threads = 0; + + // Determine threading mode + bool use_mt = (num_threads != 0); + if (num_threads < 0) + num_threads = std::thread::hardware_concurrency(); + if (num_threads < 1) + num_threads = 1; + + // In MT mode: split photons across events, one event per thread-batch + // In sequential mode: one event with all photons (original behavior) + int num_events, photons_per_event; + if (use_mt) + { + num_events = num_threads * 4; // 4 events per thread for load balancing + photons_per_event = (total_photons + num_events - 1) / num_events; + // Adjust num_events so we don't overshoot + num_events = (total_photons + photons_per_event - 1) / photons_per_event; + } + else + { + num_events = 1; + photons_per_event = total_photons; + } + + int actual_photons = num_events * photons_per_event; + + G4cout << "Random seed set to: " << seed << G4endl; + G4cout << "G4: " << total_photons << " photons, " << num_events << " events x " << photons_per_event + << " photons/event" << " (" << actual_photons << " actual)" + << (use_mt ? ", " + to_string(num_threads) + " threads" : ", sequential") << G4endl; + + HitAccumulator accumulator; + PhotonFateAccumulator fate; + + if (aligned) + fate.Resize(total_photons); + + G4VModularPhysicsList *physics = new FTFP_BERT; + if (aligned) + physics->RegisterPhysics(new AlignedOpticalPhysics); + else + physics->RegisterPhysics(new G4OpticalPhysics); + + // Use exponential WLS time profile (default is delta = zero delay) + G4OpticalParameters::Instance()->SetWLSTimeProfile("exponential"); + + if (use_mt) + { + auto *run_mgr = new G4MTRunManager; + run_mgr->SetNumberOfThreads(num_threads); + run_mgr->SetUserInitialization(physics); + run_mgr->SetUserInitialization(new G4OnlyDetectorConstruction(gdml_file, &accumulator)); + run_mgr->SetUserInitialization( + new G4OnlyActionInitialization(cfg, &accumulator, &fate, photons_per_event, num_events, aligned)); + run_mgr->Initialize(); + + CLHEP::HepRandom::setTheSeed(seed); + + G4cout << "G4: Starting MT run with " << num_events << " events..." << G4endl; + run_mgr->BeamOn(num_events); + + delete run_mgr; + } + else + { + G4RunManager run_mgr; + run_mgr.SetUserInitialization(physics); + run_mgr.SetUserInitialization(new G4OnlyDetectorConstruction(gdml_file, &accumulator)); + run_mgr.SetUserInitialization( + new G4OnlyActionInitialization(cfg, &accumulator, &fate, photons_per_event, num_events, aligned)); + + if (aligned) + { + G4cout << "G4: Aligned mode — creating U4Random" << G4endl; + U4Random::Create(); + } + + run_mgr.Initialize(); + + CLHEP::HepRandom::setTheSeed(seed); + + G4cout << "G4: Starting sequential run..." << G4endl; + run_mgr.BeamOn(num_events); + } + + return EXIT_SUCCESS; +} diff --git a/src/StandAloneGeant4Validation.h b/src/StandAloneGeant4Validation.h new file mode 100644 index 000000000..49dd92c67 --- /dev/null +++ b/src/StandAloneGeant4Validation.h @@ -0,0 +1,625 @@ +#pragma once + +#include +#include +#include +#include + +#include "G4Event.hh" +#include "G4GDMLParser.hh" +#include "G4OpBoundaryProcess.hh" +#include "G4OpWLS.hh" +#include "G4OpticalPhoton.hh" +#include "G4PhysicalConstants.hh" +#include "G4PrimaryParticle.hh" +#include "G4PrimaryVertex.hh" +#include "G4ProcessManager.hh" +#include "G4Run.hh" +#include "G4SDManager.hh" +#include "G4SystemOfUnits.hh" +#include "G4THitsCollection.hh" +#include "G4ThreeVector.hh" +#include "G4Track.hh" +#include "G4TrackStatus.hh" +#include "G4UserEventAction.hh" +#include "G4UserRunAction.hh" +#include "G4UserSteppingAction.hh" +#include "G4UserTrackingAction.hh" +#include "G4VHit.hh" +#include "G4VPhysicalVolume.hh" +#include "G4VPhysicsConstructor.hh" +#include "G4VUserActionInitialization.hh" +#include "G4VUserDetectorConstruction.hh" +#include "G4VUserPrimaryGeneratorAction.hh" + +#include "ShimG4OpAbsorption.hh" +#include "ShimG4OpRayleigh.hh" +#include "U4Random.hh" + +#include "sysrap/NP.hh" +#include "sysrap/sphoton.h" + +#include "config.h" +#include "torch.h" + +// ---- Global hit accumulator (thread-safe) ---- + +struct HitAccumulator +{ + std::mutex mtx; + std::vector hits; + + void AddHits(const std::vector &event_hits) + { + std::lock_guard lock(mtx); + hits.insert(hits.end(), event_hits.begin(), event_hits.end()); + } + + void Save(const char *filename) + { + std::lock_guard lock(mtx); + G4int num_hits = hits.size(); + NP *arr = NP::Make(num_hits, 4, 4); + for (int i = 0; i < num_hits; i++) + { + float *data = reinterpret_cast(&hits[i]); + std::copy(data, data + 16, arr->values() + i * 16); + } + arr->save(filename); + delete arr; + G4cout << "G4: Saved " << num_hits << " total hits to " << filename << G4endl; + } +}; + +// ---- Sensitive Detector: collects optical photon hits per event ---- + +struct G4PhotonHit : public G4VHit +{ + G4PhotonHit() = default; + + G4PhotonHit(G4double energy, G4double time, G4ThreeVector position, G4ThreeVector direction, + G4ThreeVector polarization) + : photon() + { + photon.pos = {static_cast(position.x()), static_cast(position.y()), + static_cast(position.z())}; + photon.time = time; + photon.mom = {static_cast(direction.x()), static_cast(direction.y()), + static_cast(direction.z())}; + photon.pol = {static_cast(polarization.x()), static_cast(polarization.y()), + static_cast(polarization.z())}; + photon.wavelength = h_Planck * c_light / (energy * CLHEP::eV); + } + + void Print() override + { + G4cout << photon << G4endl; + } + + sphoton photon; +}; + +using G4PhotonHitsCollection = G4THitsCollection; + +struct G4PhotonSD : public G4VSensitiveDetector +{ + HitAccumulator *accumulator; + + G4PhotonSD(G4String name, HitAccumulator *acc) : G4VSensitiveDetector(name), accumulator(acc) + { + G4String HCname = name + "_HC"; + collectionName.insert(HCname); + } + + void Initialize(G4HCofThisEvent *hce) override + { + fHitsCollection = new G4PhotonHitsCollection(SensitiveDetectorName, collectionName[0]); + if (fHCID < 0) + fHCID = G4SDManager::GetSDMpointer()->GetCollectionID(collectionName[0]); + hce->AddHitsCollection(fHCID, fHitsCollection); + } + + G4bool ProcessHits(G4Step *aStep, G4TouchableHistory *) override + { + G4Track *track = aStep->GetTrack(); + if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition()) + return false; + + G4PhotonHit *hit = new G4PhotonHit( + track->GetTotalEnergy(), track->GetGlobalTime(), aStep->GetPostStepPoint()->GetPosition(), + aStep->GetPostStepPoint()->GetMomentumDirection(), aStep->GetPostStepPoint()->GetPolarization()); + + fHitsCollection->insert(hit); + track->SetTrackStatus(fStopAndKill); + return true; + } + + void EndOfEvent(G4HCofThisEvent *) override + { + G4int num_hits = fHitsCollection->entries(); + + std::vector event_hits; + event_hits.reserve(num_hits); + for (G4PhotonHit *hit : *fHitsCollection->GetVector()) + event_hits.push_back(hit->photon); + + accumulator->AddHits(event_hits); + } + + private: + G4PhotonHitsCollection *fHitsCollection = nullptr; + G4int fHCID = -1; +}; + +// ---- Detector Construction: loads GDML, attaches SD ---- + +struct G4OnlyDetectorConstruction : G4VUserDetectorConstruction +{ + G4OnlyDetectorConstruction(std::filesystem::path gdml_file, HitAccumulator *acc) + : gdml_file_(gdml_file), accumulator_(acc) + { + } + + G4VPhysicalVolume *Construct() override + { + parser_.Read(gdml_file_.string(), false); + return parser_.GetWorldVolume(); + } + + void ConstructSDandField() override + { + G4SDManager *SDman = G4SDManager::GetSDMpointer(); + const G4GDMLAuxMapType *auxmap = parser_.GetAuxMap(); + + for (auto const &[logVol, listType] : *auxmap) + { + for (auto const &auxtype : listType) + { + if (auxtype.type == "SensDet") + { + G4String name = logVol->GetName() + "_" + auxtype.value; + G4cout << "G4: Attaching SD to " << logVol->GetName() << G4endl; + G4PhotonSD *sd = new G4PhotonSD(name, accumulator_); + SDman->AddNewDetector(sd); + logVol->SetSensitiveDetector(sd); + } + } + } + } + + private: + std::filesystem::path gdml_file_; + G4GDMLParser parser_; + HitAccumulator *accumulator_; +}; + +// ---- Primary Generator: distributes photons across events ---- + +struct G4OnlyPrimaryGenerator : G4VUserPrimaryGeneratorAction +{ + gphox::Config cfg; + int photons_per_event; + + G4OnlyPrimaryGenerator(const gphox::Config &cfg, int photons_per_event) + : cfg(cfg), photons_per_event(photons_per_event) + { + } + + void GeneratePrimaries(G4Event *event) override + { + int eventID = event->GetEventID(); + + // Generate photons for this event's batch using event-specific seed offset + storch t = cfg.torch; + t.numphoton = photons_per_event; + std::vector sphotons = generate_photons(t, photons_per_event, eventID); + + for (const sphoton &p : sphotons) + { + G4ThreeVector position(p.pos.x, p.pos.y, p.pos.z); + G4ThreeVector direction(p.mom.x, p.mom.y, p.mom.z); + G4ThreeVector polarization(p.pol.x, p.pol.y, p.pol.z); + G4double wavelength_nm = p.wavelength; + + G4PrimaryVertex *vertex = new G4PrimaryVertex(position, p.time); + G4double energy = h_Planck * c_light / (wavelength_nm * nm); + + G4PrimaryParticle *particle = new G4PrimaryParticle(G4OpticalPhoton::Definition()); + particle->SetKineticEnergy(energy); + particle->SetMomentumDirection(direction); + particle->SetPolarization(polarization); + + vertex->SetPrimary(particle); + event->AddPrimaryVertex(vertex); + } + } +}; + +// ---- Photon fate accumulator: tracks ALL photon final states ---- + +struct PhotonFateAccumulator +{ + std::mutex mtx; + std::vector photons; + bool indexed = false; // true for aligned mode: store by photon index + + // Opticks flag enum values + static constexpr unsigned TORCH = 0x0004; + static constexpr unsigned BULK_ABSORB = 0x0008; + static constexpr unsigned BULK_REEMIT = 0x0010; + static constexpr unsigned BULK_SCATTER = 0x0020; + static constexpr unsigned SURFACE_DETECT = 0x0040; + static constexpr unsigned SURFACE_ABSORB = 0x0080; + static constexpr unsigned SURFACE_DREFLECT = 0x0100; + static constexpr unsigned SURFACE_SREFLECT = 0x0200; + static constexpr unsigned BOUNDARY_REFLECT = 0x0400; + static constexpr unsigned BOUNDARY_TRANSMIT = 0x0800; + static constexpr unsigned MISS = 0x8000; + + void Resize(int n) + { + photons.resize(n); + indexed = true; + } + + void Set(int idx, const sphoton &p) + { + if (idx >= 0 && idx < (int)photons.size()) + photons[idx] = p; + } + + void Add(const sphoton &p) + { + std::lock_guard lock(mtx); + photons.push_back(p); + } + + void Save(const char *filename) + { + std::lock_guard lock(mtx); + int n = photons.size(); + NP *arr = NP::Make(n, 4, 4); + for (int i = 0; i < n; i++) + { + float *data = reinterpret_cast(&photons[i]); + std::copy(data, data + 16, arr->values() + i * 16); + } + arr->save(filename); + delete arr; + G4cout << "G4: Saved " << n << " photon fates to " << filename << G4endl; + } +}; + +// ---- Stepping Action: tracks photon fates with opticks-compatible flags ---- + +struct G4OnlySteppingAction : G4UserSteppingAction +{ + PhotonFateAccumulator *fate; + bool aligned; + std::map proc_death_counts; + std::map boundary_status_counts; + std::mutex count_mtx; + + G4OnlySteppingAction(PhotonFateAccumulator *f, bool aligned_ = false) : fate(f), aligned(aligned_) + { + } + + ~G4OnlySteppingAction() + { + std::lock_guard lock(count_mtx); + if (!proc_death_counts.empty()) + { + G4cout << "\nG4: Photon death process summary:" << G4endl; + for (auto &[name, count] : proc_death_counts) + G4cout << " " << name << ": " << count << G4endl; + } + if (!boundary_status_counts.empty()) + { + G4cout << "\nG4: OpBoundary status counts (all steps):" << G4endl; + const char *bnames[] = {"Undefined", + "Transmission", + "FresnelRefraction", + "FresnelReflection", + "TotalInternalReflection", + "LambertianReflection", + "LobeReflection", + "SpikeReflection", + "BackScattering", + "Absorption", + "Detection", + "NotAtBoundary", + "SameMaterial", + "StepTooSmall", + "NoRINDEX", + "PolishedLumirrorAirReflection", + "PolishedLumirrorGlueReflection", + "PolishedAirReflection", + "PolishedTeflonAirReflection", + "PolishedTiOAirReflection", + "PolishedTyvekAirReflection", + "PolishedVM2000AirReflection", + "PolishedVM2000GlueReflection", + "EtchedLumirrorAirReflection", + "EtchedLumirrorGlueReflection", + "EtchedAirReflection", + "EtchedTeflonAirReflection", + "EtchedTiOAirReflection", + "EtchedTyvekAirReflection", + "EtchedVM2000AirReflection", + "EtchedVM2000GlueReflection", + "GroundLumirrorAirReflection", + "GroundLumirrorGlueReflection", + "GroundAirReflection", + "GroundTeflonAirReflection", + "GroundTiOAirReflection", + "GroundTyvekAirReflection", + "GroundVM2000AirReflection", + "GroundVM2000GlueReflection", + "Dichroic", + "CoatedDielectricReflection", + "CoatedDielectricRefraction", + "CoatedDielectricFrustratedTransmission"}; + for (auto &[st, count] : boundary_status_counts) + { + const char *nm = (st >= 0 && st < 43) ? bnames[st] : "?"; + G4cout << " " << nm << "(" << st << "): " << count << G4endl; + } + } + } + + void UserSteppingAction(const G4Step *aStep) override + { + G4Track *track = aStep->GetTrack(); + if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition()) + return; + + G4StepPoint *post = aStep->GetPostStepPoint(); + G4TrackStatus status = track->GetTrackStatus(); + + // Find the OpBoundary process to get its status (for ALL steps) + G4OpBoundaryProcess *boundary = nullptr; + G4ProcessManager *pm = track->GetDefinition()->GetProcessManager(); + for (int i = 0; i < pm->GetPostStepProcessVector()->entries(); i++) + { + G4VProcess *p = (*pm->GetPostStepProcessVector())[i]; + boundary = dynamic_cast(p); + if (boundary) + break; + } + + G4OpBoundaryProcessStatus bStatus = boundary ? boundary->GetStatus() : Undefined; + + // Count boundary status for ALL steps + if (boundary && bStatus != NotAtBoundary && bStatus != Undefined && bStatus != StepTooSmall) + { + std::lock_guard lock(count_mtx); + boundary_status_counts[int(bStatus)]++; + } + + // Only record photon state when the photon is about to die + if (status != fStopAndKill && status != fStopButAlive) + return; + + // Identify the process + const G4VProcess *proc = post->GetProcessDefinedStep(); + G4String procName = proc ? proc->GetProcessName() : "Unknown"; + + // Build detailed key for counting + std::string key = procName; + if (procName == "OpBoundary" && boundary) + key += "(" + std::to_string(int(bStatus)) + ")"; + key += (status == fStopAndKill ? "/Kill" : "/Alive"); + + { + std::lock_guard lock(count_mtx); + proc_death_counts[key]++; + } + + // Map to opticks flag + unsigned flag = 0; + + if (procName == "OpAbsorption") + { + flag = PhotonFateAccumulator::BULK_ABSORB; + } + else if (procName == "OpWLS") + { + flag = PhotonFateAccumulator::BULK_REEMIT; + } + else if (procName == "OpBoundary" && boundary) + { + switch (bStatus) + { + case Detection: + flag = PhotonFateAccumulator::SURFACE_DETECT; + break; + case Absorption: + flag = PhotonFateAccumulator::SURFACE_ABSORB; + break; + case FresnelReflection: + case TotalInternalReflection: + flag = PhotonFateAccumulator::BOUNDARY_REFLECT; + break; + case FresnelRefraction: + flag = PhotonFateAccumulator::BOUNDARY_TRANSMIT; + break; + case LambertianReflection: + case LobeReflection: + flag = PhotonFateAccumulator::SURFACE_DREFLECT; + break; + case SpikeReflection: + flag = PhotonFateAccumulator::SURFACE_SREFLECT; + break; + case BackScattering: + flag = PhotonFateAccumulator::SURFACE_DREFLECT; + break; + default: + flag = PhotonFateAccumulator::SURFACE_ABSORB; + break; + } + } + else if (procName == "Transportation") + { + // Check if an SD killed this photon (SURFACE_DETECT) + G4StepPoint *pre = aStep->GetPreStepPoint(); + G4VPhysicalVolume *preVol = pre->GetPhysicalVolume(); + G4VPhysicalVolume *postVol = post->GetPhysicalVolume(); + G4LogicalVolume *preLog = preVol ? preVol->GetLogicalVolume() : nullptr; + G4LogicalVolume *postLog = postVol ? postVol->GetLogicalVolume() : nullptr; + bool sd_pre = preLog && preLog->GetSensitiveDetector(); + bool sd_post = postLog && postLog->GetSensitiveDetector(); + if (sd_pre || sd_post) + flag = PhotonFateAccumulator::SURFACE_DETECT; + else + flag = PhotonFateAccumulator::BOUNDARY_TRANSMIT; + } + + if (flag == 0) + flag = PhotonFateAccumulator::MISS; // catch-all + + // Build sphoton with the final state + G4ThreeVector pos = post->GetPosition(); + G4ThreeVector mom = post->GetMomentumDirection(); + G4ThreeVector pol = post->GetPolarization(); + G4double time = post->GetGlobalTime(); + G4double energy = post->GetTotalEnergy(); + + sphoton p = {}; + p.pos = {float(pos.x()), float(pos.y()), float(pos.z())}; + p.time = float(time); + p.mom = {float(mom.x()), float(mom.y()), float(mom.z())}; + p.pol = {float(pol.x()), float(pol.y()), float(pol.z())}; + p.wavelength = (energy > 0) ? float(h_Planck * c_light / (energy * CLHEP::eV)) : 0.f; + + p.orient_boundary_flag = flag & 0xFFFF; + p.flagmask = flag; + + if (aligned && fate->indexed) + { + int photon_idx = track->GetTrackID() - 1; // G4 trackIDs are 1-based + fate->Set(photon_idx, p); + } + else + { + fate->Add(p); + } + } +}; + +// ---- Tracking Action: per-photon RNG sync for aligned mode ---- + +struct G4OnlyTrackingAction : G4UserTrackingAction +{ + void PreUserTrackingAction(const G4Track *track) override + { + if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition()) + return; + int photon_idx = track->GetTrackID() - 1; // G4 trackIDs are 1-based + U4Random::SetSequenceIndex(photon_idx); + } + + void PostUserTrackingAction(const G4Track *track) override + { + if (track->GetDefinition() != G4OpticalPhoton::OpticalPhotonDefinition()) + return; + U4Random::SetSequenceIndex(-1); + } +}; + +// ---- AlignedOpticalPhysics: uses Shim processes for precise RNILL matching ---- + +struct AlignedOpticalPhysics : G4VPhysicsConstructor +{ + AlignedOpticalPhysics() : G4VPhysicsConstructor("AlignedOptical") + { + } + void ConstructParticle() override + { + } + void ConstructProcess() override + { + auto *pm = G4OpticalPhoton::OpticalPhoton()->GetProcessManager(); + pm->AddDiscreteProcess(new ShimG4OpAbsorption()); + pm->AddDiscreteProcess(new ShimG4OpRayleigh()); + pm->AddDiscreteProcess(new G4OpBoundaryProcess()); + pm->AddDiscreteProcess(new G4OpWLS()); + } +}; + +// ---- Event Action: reports per-event progress ---- + +struct G4OnlyEventAction : G4UserEventAction +{ + int total_events; + + G4OnlyEventAction(int total_events) : total_events(total_events) + { + } + + void EndOfEventAction(const G4Event *event) override + { + int id = event->GetEventID(); + if (id == 0 || (id + 1) % 10 == 0 || id + 1 == total_events) + G4cout << "G4: Event " << id + 1 << "/" << total_events << G4endl; + } +}; + +// ---- Run Action: saves merged hits at end ---- + +struct G4OnlyRunAction : G4UserRunAction +{ + HitAccumulator *accumulator; + PhotonFateAccumulator *fate; + + G4OnlyRunAction(HitAccumulator *acc, PhotonFateAccumulator *f = nullptr) : accumulator(acc), fate(f) + { + } + + void EndOfRunAction(const G4Run *) override + { + if (G4Threading::IsMasterThread() || !G4Threading::IsMultithreadedApplication()) + { + G4cout << "G4: Total accumulated hits: " << accumulator->hits.size() << G4endl; + accumulator->Save("g4_hits.npy"); + if (fate) + { + G4cout << "G4: Total photon fates: " << fate->photons.size() << G4endl; + fate->Save("g4_photon.npy"); + } + } + } +}; + +// ---- Action Initialization (required for MT) ---- + +struct G4OnlyActionInitialization : G4VUserActionInitialization +{ + gphox::Config cfg; + HitAccumulator *accumulator; + PhotonFateAccumulator *fate; + int photons_per_event; + int num_events; + bool aligned; + + G4OnlyActionInitialization(const gphox::Config &cfg, HitAccumulator *acc, PhotonFateAccumulator *f, + int photons_per_event, int num_events, bool aligned_ = false) + : cfg(cfg), accumulator(acc), fate(f), photons_per_event(photons_per_event), num_events(num_events), + aligned(aligned_) + { + } + + void BuildForMaster() const override + { + SetUserAction(new G4OnlyRunAction(accumulator, fate)); + } + + void Build() const override + { + SetUserAction(new G4OnlyPrimaryGenerator(cfg, photons_per_event)); + SetUserAction(new G4OnlyEventAction(num_events)); + SetUserAction(new G4OnlyRunAction(accumulator, fate)); + SetUserAction(new G4OnlySteppingAction(fate, aligned)); + if (aligned) + SetUserAction(new G4OnlyTrackingAction()); + } +}; diff --git a/src/config.cpp b/src/config.cpp index 844244017..8c898cdce 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -6,15 +6,16 @@ #include #include -#include #include +#include #include "sysrap/SEventConfig.hh" #include "config.h" #include "config_path.h" -namespace gphox { +namespace gphox +{ using namespace std; @@ -28,10 +29,9 @@ bool FileExists(const std::string &path) return std::filesystem::exists(path, ec) && !ec; } -Config::Config(std::string config_name) : - name{std::getenv("GPHOX_CONFIG") ? std::getenv("GPHOX_CONFIG") : config_name} +Config::Config(std::string config_name) : name{std::getenv("GPHOX_CONFIG") ? std::getenv("GPHOX_CONFIG") : config_name} { - ReadConfig(Locate(name + ".json")); + ReadConfig(Locate(name + ".json")); } std::string Config::PtxPath(const std::string &ptx_name) @@ -54,93 +54,96 @@ std::string Config::PtxPath(const std::string &ptx_name) std::string Config::Locate(std::string filename) const { - std::vector search_paths; + std::vector search_paths; + + const std::string user_dir{std::getenv("GPHOX_CONFIG_DIR") ? std::getenv("GPHOX_CONFIG_DIR") : ""}; - const std::string user_dir{std::getenv("GPHOX_CONFIG_DIR") ? std::getenv("GPHOX_CONFIG_DIR") : ""}; + if (user_dir.empty()) + { + std::string paths(GPHOX_CONFIG_SEARCH_PATHS); - if (user_dir.empty()) - { - std::string paths(GPHOX_CONFIG_SEARCH_PATHS); + size_t last = 0; + size_t next = 0; + while ((next = paths.find(':', last)) != std::string::npos) + { + search_paths.push_back(paths.substr(last, next - last)); + last = next + 1; + } - size_t last = 0; - size_t next = 0; - while ((next = paths.find(':', last)) != std::string::npos) + search_paths.push_back(paths.substr(last)); + } + else { - search_paths.push_back(paths.substr(last, next-last)); - last = next + 1; + search_paths.push_back(user_dir); } - search_paths.push_back(paths.substr(last)); - } - else - { - search_paths.push_back(user_dir); - } - - struct stat buffer; - std::string filepath{""}; - for (std::string path : search_paths) - { - std::string fpath{path + "/" + filename}; - if (stat(fpath.c_str(), &buffer) == 0) + struct stat buffer; + std::string filepath{""}; + for (std::string path : search_paths) { - filepath = fpath; - break; + std::string fpath{path + "/" + filename}; + if (stat(fpath.c_str(), &buffer) == 0) + { + filepath = fpath; + break; + } } - } - if (filepath.empty()) - { - std::string errmsg{"Could not find config file \"" + filename + "\" in "}; - for (std::string path : search_paths) errmsg += (path + ":"); - throw std::runtime_error(errmsg); - } + if (filepath.empty()) + { + std::string errmsg{"Could not find config file \"" + filename + "\" in "}; + for (std::string path : search_paths) + errmsg += (path + ":"); + throw std::runtime_error(errmsg); + } - return filepath; + return filepath; } - /** * Expects a valid filepath. */ void Config::ReadConfig(std::string filepath) { - nlohmann::json json; - - try { - std::ifstream ifs(filepath); - ifs >> json; - - nlohmann::json torch_ = json["torch"]; - - torch = { - .gentype = OpticksGenstep_::Type(torch_["gentype"]), - .trackid = torch_["trackid"], - .matline = torch_["matline"], - .numphoton = torch_["numphoton"], - .pos = make_float3(torch_["pos"][0], torch_["pos"][1], torch_["pos"][2]), - .time = torch_["time"], - .mom = normalize(make_float3(torch_["mom"][0], torch_["mom"][1], torch_["mom"][2])), - .weight = torch_["weight"], - .pol = make_float3(torch_["pol"][0], torch_["pol"][1], torch_["pol"][2]), - .wavelength = torch_["wavelength"], - .zenith = make_float2(torch_["zenith"][0], torch_["zenith"][1]), - .azimuth = make_float2(torch_["azimuth"][0], torch_["azimuth"][1]), - .radius = torch_["radius"], - .distance = torch_["distance"], - .mode = torch_["mode"], - .type = storchtype::Type(torch_["type"]) - }; - - nlohmann::json event_ = json["event"]; - - SEventConfig::SetEventMode( string(event_["mode"]).c_str() ); - SEventConfig::SetMaxSlot( event_["maxslot"] ); - } - catch (nlohmann::json::exception& e) { - std::string errmsg{"Failed reading config parameters from " + filepath + "\n" + e.what()}; - throw std::runtime_error{errmsg}; - } -} + nlohmann::json json; + try + { + std::ifstream ifs(filepath); + ifs >> json; + + nlohmann::json torch_ = json["torch"]; + + torch = {.gentype = OpticksGenstep_::Type(torch_["gentype"]), + .trackid = torch_["trackid"], + .matline = torch_["matline"], + .numphoton = torch_["numphoton"], + .pos = make_float3(torch_["pos"][0], torch_["pos"][1], torch_["pos"][2]), + .time = torch_["time"], + .mom = normalize(make_float3(torch_["mom"][0], torch_["mom"][1], torch_["mom"][2])), + .weight = torch_["weight"], + .pol = make_float3(torch_["pol"][0], torch_["pol"][1], torch_["pol"][2]), + .wavelength = torch_["wavelength"], + .zenith = make_float2(torch_["zenith"][0], torch_["zenith"][1]), + .azimuth = make_float2(torch_["azimuth"][0], torch_["azimuth"][1]), + .radius = torch_["radius"], + .distance = torch_["distance"], + .mode = torch_["mode"], + .type = storchtype::Type(torch_["type"])}; + + nlohmann::json event_ = json["event"]; + + SEventConfig::SetEventMode(string(event_["mode"]).c_str()); + SEventConfig::SetMaxSlot(event_["maxslot"]); + + if (event_.contains("savephotonhistory")) + savephotonhistory = event_["savephotonhistory"].get(); + } + catch (nlohmann::json::exception &e) + { + std::string errmsg{"Failed reading config parameters from " + filepath + "\n" + e.what()}; + throw std::runtime_error{errmsg}; + } } + +} // namespace gphox diff --git a/src/config.h b/src/config.h index 1fc5c838d..28e26416c 100644 --- a/src/config.h +++ b/src/config.h @@ -7,29 +7,29 @@ #include "sysrap/srng.h" #include "sysrap/storch.h" -namespace gphox { - +namespace gphox +{ /** * Provides access to all configuration types and data. */ class Config { - public: - - Config(std::string config_name = "dev"); + public: + Config(std::string config_name = "dev"); - static std::string PtxPath(const std::string &ptx_name = "CSGOptiX7.ptx"); + static std::string PtxPath(const std::string &ptx_name = "CSGOptiX7.ptx"); - /// A unique name associated with this Config - std::string name; + /// A unique name associated with this Config + std::string name; - storch torch; + storch torch; - private: + bool savephotonhistory{false}; - std::string Locate(std::string filename) const; - void ReadConfig(std::string filepath); + private: + std::string Locate(std::string filename) const; + void ReadConfig(std::string filepath); }; -} +} // namespace gphox diff --git a/sysrap/SEventConfig.cc b/sysrap/SEventConfig.cc index c37de3632..b3d95633d 100644 --- a/sysrap/SEventConfig.cc +++ b/sysrap/SEventConfig.cc @@ -776,7 +776,7 @@ void SEventConfig::LIMIT_Check() //assert( _MaxBounce >= 0 && _MaxBounce < LIMIT ) ; // MaxBounce should not in principal be limited - assert( _MaxRecord >= 0 && _MaxRecord <= RecordLimit() ) ; + assert(_MaxRecord >= 0); // RecordLimit relaxed to allow large record arrays for step analysis assert( _MaxRec >= 0 && _MaxRec <= RecordLimit() ) ; assert( _MaxPrd >= 0 && _MaxPrd <= RecordLimit() ) ; @@ -1590,7 +1590,8 @@ void SEventConfig::Initialize_Comp_Simulate_(unsigned& gather_mask, unsigned& sa else if(IsDebugLite()) { SEventConfig::SetMaxRec(0); - SEventConfig::SetMaxRecord(record_limit); + int env_max_record = ssys::getenvint(kMaxRecord, 0); + SEventConfig::SetMaxRecord(env_max_record > 0 ? env_max_record : record_limit); SEventConfig::SetMaxSeq(1); // formerly incorrectly set to max_bounce+1 } diff --git a/sysrap/snam.h b/sysrap/snam.h index 1f2e49f57..4dd713e9f 100644 --- a/sysrap/snam.h +++ b/sysrap/snam.h @@ -16,6 +16,10 @@ struct snam static constexpr const char* OPTICAL = "optical.npy" ; static constexpr const char* ICDF = "icdf.npy" ; + static constexpr const char *WLS_ICDF = "wls_icdf.npy"; + static constexpr const char *WLS_MAT_MAP = "wls_mat_map.npy"; + static constexpr const char *WLS_TIME_CONSTANTS = "wls_time_constants.npy"; + static constexpr const char* MULTIFILM = "multifilm.npy" ; static constexpr const char* PROPCOM = "propcom.npy" ; diff --git a/sysrap/sproplist.h b/sysrap/sproplist.h index deeffa02a..9ca1b3239 100644 --- a/sysrap/sproplist.h +++ b/sysrap/sproplist.h @@ -3,25 +3,25 @@ sproplist.h =================== -For MATERIAL the property default constants +For MATERIAL the property default constants are taken from GMaterialLib::defineDefaults For SURFACE setting the prop values:: (detect, absorb, reflect_specular, reflect_diffuse -requires access to optical surface type, -if not already present need to add metadata -to the surface NPFold/NP to carry that info. +requires access to optical surface type, +if not already present need to add metadata +to the surface NPFold/NP to carry that info. -Nov 1 2023 : Increase default ABSLENGTH RAYLEIGH 1e6 -> 1e12 mm +Nov 1 2023 : Increase default ABSLENGTH RAYLEIGH 1e6 -> 1e12 mm ------------------------------------------------------------------ Increase default ABSLENGTH RAYLEIGH from 1e6 to 1e12 due to notes/issues/G4CXTest_raindrop_shakedown.rst -This is relevant to simple tests where it is common -not to define ABSLENGTH and RAYLEIGH properties. +This is relevant to simple tests where it is common +not to define ABSLENGTH and RAYLEIGH properties. **/ @@ -29,19 +29,19 @@ not to define ABSLENGTH and RAYLEIGH properties. struct sproplist { - static constexpr const char* MATERIAL = R"( + static constexpr const char *MATERIAL = R"( 0 0 RINDEX 1 0 1 ABSLENGTH 1e12 0 2 RAYLEIGH 1e12 0 3 REEMISSIONPROB 0. 1 0 GROUPVEL 299.792458 - 1 1 SPARE11 0. + 1 1 WLSABSLENGTH 1e12 1 2 SPARE12 0. 1 3 SPARE13 0. - )" ; - // default GROUPVEL set to c_light_mm_per_ns, see U4PhysicalConstants.h + )"; + // default GROUPVEL set to c_light_mm_per_ns, see U4PhysicalConstants.h - static constexpr const char* SURFACE = R"( + static constexpr const char *SURFACE = R"( 0 0 EFFICIENCY -2 0 1 SPARE01 -2 0 2 REFLECTIVITY -2 @@ -50,48 +50,46 @@ struct sproplist 1 1 SPARE11 -2 1 2 SPARE12 -2 1 3 SPARE13 -2 - )" ; + )"; - static const sproplist* Material() ; - static const sproplist* Surface() ; + static const sproplist *Material(); + static const sproplist *Surface(); - std::vector PROP ; - sproplist(const char* spec ); + std::vector PROP; + sproplist(const char *spec); - std::string desc() const ; - void getNames(std::vector& pnames, const char* skip_prefix="SPARE") const ; - const sprop* findProp(const char* pname) const ; - const sprop* get(int g, int p) const ; + std::string desc() const; + void getNames(std::vector &pnames, const char *skip_prefix = "SPARE") const; + const sprop *findProp(const char *pname) const; + const sprop *get(int g, int p) const; }; -inline const sproplist* sproplist::Material() // static +inline const sproplist *sproplist::Material() // static { - return new sproplist(MATERIAL) ; + return new sproplist(MATERIAL); } -inline const sproplist* sproplist::Surface() // static +inline const sproplist *sproplist::Surface() // static { - return new sproplist(SURFACE) ; + return new sproplist(SURFACE); } -inline sproplist::sproplist(const char* spec) +inline sproplist::sproplist(const char *spec) { - sprop::Parse(PROP, spec); + sprop::Parse(PROP, spec); } -inline std::string sproplist::desc() const +inline std::string sproplist::desc() const { - return sprop::Desc(PROP); + return sprop::Desc(PROP); } -inline void sproplist::getNames(std::vector& pnames, const char* skip_prefix ) const +inline void sproplist::getNames(std::vector &pnames, const char *skip_prefix) const { - sprop::GetNames(pnames, PROP, skip_prefix); + sprop::GetNames(pnames, PROP, skip_prefix); } -inline const sprop* sproplist::findProp(const char* pname) const +inline const sprop *sproplist::findProp(const char *pname) const { - return sprop::FindProp(PROP, pname); + return sprop::FindProp(PROP, pname); } -inline const sprop* sproplist::get(int g, int v) const +inline const sprop *sproplist::get(int g, int v) const { - return sprop::Find(PROP, g, v) ; + return sprop::Find(PROP, g, v); } - - diff --git a/sysrap/sstandard.h b/sysrap/sstandard.h index 1c62d6f0c..c14370a66 100644 --- a/sysrap/sstandard.h +++ b/sysrap/sstandard.h @@ -77,92 +77,70 @@ In the old X4/GGeo workflow, the bnd buffer was created with:: **/ -#include #include #include +#include #include "NPFold.h" #include "NPX.h" -#include "sproplist.h" #include "sdomain.h" #include "smatsur.h" #include "snam.h" +#include "sproplist.h" struct sstandard { - static constexpr const bool VERBOSE = false ; - static constexpr const char* IMPLICIT_PREFIX = "Implicit_RINDEX_NoRINDEX" ; - const sdomain* dom ; + static constexpr const bool VERBOSE = false; + static constexpr const char *IMPLICIT_PREFIX = "Implicit_RINDEX_NoRINDEX"; + const sdomain *dom; - const NP* wavelength ; - const NP* energy ; - const NP* rayleigh ; - const NP* mat ; - const NP* sur ; - const NP* bd ; - const NP* bnd ; - const NP* optical ; + const NP *wavelength; + const NP *energy; + const NP *rayleigh; + const NP *mat; + const NP *sur; + const NP *bd; + const NP *bnd; + const NP *optical; - const NP* icdf ; + const NP *icdf; + const NP *wls_icdf; + const NP *wls_mat_map; + const NP *wls_time_constants; sstandard(); - void deferred_init( - const std::vector& vbd, - const std::vector& bdname, - const std::vector& suname, - const NPFold* surface - ); - - NPFold* serialize() const ; - void import(const NPFold* fold ); - - void save(const char* base, const char* rel ); - void load(const char* base, const char* rel ); - - - static NP* make_bd( - const std::vector& vbd, - const std::vector& bdname - ); - - static NP* make_optical( - const std::vector& vbd, - const std::vector& suname, - const NPFold* surface - ); - - static NP* make_bnd( - const std::vector& vbd, - const std::vector& bdname, - const NP* mat, - const NP* sur - ); - - static void column_range(int4& mn, int4& mx, const std::vector& vbd) ; - static NP* unused_mat(const std::vector& names, const NPFold* fold ); - static NP* unused_sur(const std::vector& names, const NPFold* fold ); - static NP* unused_create(const sproplist* pl, const std::vector& names, const NPFold* fold ); -}; + void deferred_init(const std::vector &vbd, const std::vector &bdname, + const std::vector &suname, const NPFold *surface); + + NPFold *serialize() const; + void import(const NPFold *fold); + + void save(const char *base, const char *rel); + void load(const char *base, const char *rel); + + static NP *make_bd(const std::vector &vbd, const std::vector &bdname); + static NP *make_optical(const std::vector &vbd, const std::vector &suname, + const NPFold *surface); + + static NP *make_bnd(const std::vector &vbd, const std::vector &bdname, const NP *mat, + const NP *sur); + + static void column_range(int4 &mn, int4 &mx, const std::vector &vbd); + static NP *unused_mat(const std::vector &names, const NPFold *fold); + static NP *unused_sur(const std::vector &names, const NPFold *fold); + static NP *unused_create(const sproplist *pl, const std::vector &names, const NPFold *fold); +}; inline sstandard::sstandard() - : - dom(nullptr), - wavelength(nullptr), - energy(nullptr), - rayleigh(nullptr), - mat(nullptr), - sur(nullptr), - bd(nullptr), - bnd(nullptr), - optical(nullptr), - icdf(nullptr) + : dom(nullptr), wavelength(nullptr), energy(nullptr), rayleigh(nullptr), mat(nullptr), sur(nullptr), bd(nullptr), + bnd(nullptr), optical(nullptr), icdf(nullptr), wls_icdf(nullptr), wls_mat_map(nullptr), + wls_time_constants(nullptr) { } - /** sstandard::deferred_init -------------------------- @@ -176,45 +154,44 @@ after mat and sur have been filled. **/ -inline void sstandard::deferred_init( - const std::vector& vbd, - const std::vector& bdname, - const std::vector& suname, - const NPFold* surface - ) +inline void sstandard::deferred_init(const std::vector &vbd, const std::vector &bdname, + const std::vector &suname, const NPFold *surface) { - dom = new sdomain ; + dom = new sdomain; - wavelength = dom->get_wavelength_nm() ; - energy = dom->get_energy_eV() ; + wavelength = dom->get_wavelength_nm(); + energy = dom->get_energy_eV(); - bd = make_bd( vbd, bdname ); - bnd = make_bnd( vbd, bdname, mat, sur ) ; - optical = make_optical(vbd, suname, surface) ; + bd = make_bd(vbd, bdname); + bnd = make_bnd(vbd, bdname, mat, sur); + optical = make_optical(vbd, suname, surface); } - -inline NPFold* sstandard::serialize() const +inline NPFold *sstandard::serialize() const { - NPFold* fold = new NPFold ; + NPFold *fold = new NPFold; + + fold->add(snam::WAVELENGTH, wavelength); + fold->add(snam::ENERGY, energy); - fold->add(snam::WAVELENGTH , wavelength ); - fold->add(snam::ENERGY, energy ); + fold->add(snam::RAYLEIGH, rayleigh); + fold->add(snam::MAT, mat); + fold->add(snam::SUR, sur); - fold->add(snam::RAYLEIGH, rayleigh ); - fold->add(snam::MAT , mat ); - fold->add(snam::SUR , sur ); + fold->add(snam::BD, bd); + fold->add(snam::BND, bnd); + fold->add(snam::OPTICAL, optical); - fold->add(snam::BD, bd ); - fold->add(snam::BND, bnd ); - fold->add(snam::OPTICAL, optical ); + fold->add(snam::ICDF, icdf); - fold->add(snam::ICDF, icdf) ; + fold->add(snam::WLS_ICDF, wls_icdf); + fold->add(snam::WLS_MAT_MAP, wls_mat_map); + fold->add(snam::WLS_TIME_CONSTANTS, wls_time_constants); - return fold ; + return fold; } -inline void sstandard::import(const NPFold* fold ) +inline void sstandard::import(const NPFold *fold) { wavelength = fold->get(snam::WAVELENGTH); energy = fold->get(snam::ENERGY); @@ -228,21 +205,24 @@ inline void sstandard::import(const NPFold* fold ) optical = fold->get(snam::OPTICAL); icdf = fold->get(snam::ICDF); + + wls_icdf = fold->get(snam::WLS_ICDF); + wls_mat_map = fold->get(snam::WLS_MAT_MAP); + wls_time_constants = fold->get(snam::WLS_TIME_CONSTANTS); } -inline void sstandard::save(const char* base, const char* rel ) +inline void sstandard::save(const char *base, const char *rel) { - NPFold* fold = serialize(); + NPFold *fold = serialize(); fold->save(base, rel); } -inline void sstandard::load(const char* base, const char* rel ) +inline void sstandard::load(const char *base, const char *rel) { - NPFold* fold = NPFold::Load(base, rel) ; - import(fold) ; + NPFold *fold = NPFold::Load(base, rel); + import(fold); } - /** sstandard::make_bd ------------------- @@ -251,11 +231,11 @@ Create array of shape (num_bd, 4) holding int "pointers" to (omat,osur,isur,imat **/ -inline NP* sstandard::make_bd( const std::vector& vbd, const std::vector& bdname ) +inline NP *sstandard::make_bd(const std::vector &vbd, const std::vector &bdname) { - NP* a_bd = NPX::ArrayFromVec( vbd ); - a_bd->set_names( bdname ); - return a_bd ; + NP *a_bd = NPX::ArrayFromVec(vbd); + a_bd->set_names(bdname); + return a_bd; } /** @@ -308,96 +288,96 @@ that via the ems smatsur.h enum value. **/ -inline NP* sstandard::make_optical( - const std::vector& vbd, - const std::vector& suname, - const NPFold* surface ) +inline NP *sstandard::make_optical(const std::vector &vbd, const std::vector &suname, + const NPFold *surface) { - int ni = vbd.size() ; - int nj = 4 ; - int nk = 4 ; + int ni = vbd.size(); + int nj = 4; + int nk = 4; - NP* op = NP::Make(ni, nj, nk); - int* op_v = op->values(); + NP *op = NP::Make(ni, nj, nk); + int *op_v = op->values(); - for(int i=0 ; i < ni ; i++) // over vbd + for (int i = 0; i < ni; i++) // over vbd { - const int4& bd_ = vbd[i] ; - for(int j=0 ; j < nj ; j++) // over (omat,osur,isur,imat) + const int4 &bd_ = vbd[i]; + for (int j = 0; j < nj; j++) // over (omat,osur,isur,imat) { - int op_index = i*nj*nk + j*nk ; + int op_index = i * nj * nk + j * nk; - int idx = -2 ; - switch(j) + int idx = -2; + switch (j) { - case 0: idx = bd_.x ; break ; - case 1: idx = bd_.y ; break ; - case 2: idx = bd_.z ; break ; - case 3: idx = bd_.w ; break ; + case 0: + idx = bd_.x; + break; + case 1: + idx = bd_.y; + break; + case 2: + idx = bd_.z; + break; + case 3: + idx = bd_.w; + break; } - int idx1 = idx+1 ; // 1-based idx - bool is_mat = j == 0 || j == 3 ; - bool is_sur = j == 1 || j == 2 ; + int idx1 = idx + 1; // 1-based idx + bool is_mat = j == 0 || j == 3; + bool is_sur = j == 1 || j == 2; - if(is_mat) + if (is_mat) { - assert( idx > -1 ); // omat,imat must always be present - op_v[op_index+0] = idx1 ; - op_v[op_index+1] = 0 ; - op_v[op_index+2] = 0 ; - op_v[op_index+3] = 0 ; + assert(idx > -1); // omat,imat must always be present + op_v[op_index + 0] = idx1; + op_v[op_index + 1] = 0; + op_v[op_index + 2] = 0; + op_v[op_index + 3] = 0; } - else if(is_sur) + else if (is_sur) { - const char* surfname = snam::get(suname, idx) ; - - bool no_surfname_for_surface_idx = idx > -1 && surfname == nullptr ; - - if(no_surfname_for_surface_idx) std::cerr - << "sstandard::make_optical" - << " ERROR " - << " no_surfname_for_surface_idx " << ( no_surfname_for_surface_idx ? "YES" : "NO " ) - << " sur idx from bd " << idx - << " but no corresponding surfname " - << " suname.size " << suname.size() - << " surface.subfold.size " << surface->subfold.size() - << " surface.ff.size " << surface->ff.size() - << "\n" - << " snam::Desc(suname)\n" - << snam::Desc(suname) - << "\n" - ; - - if(idx > -1 ) assert(surfname) ; + const char *surfname = snam::get(suname, idx); + + bool no_surfname_for_surface_idx = idx > -1 && surfname == nullptr; + + if (no_surfname_for_surface_idx) + std::cerr << "sstandard::make_optical" << " ERROR " << " no_surfname_for_surface_idx " + << (no_surfname_for_surface_idx ? "YES" : "NO ") << " sur idx from bd " << idx + << " but no corresponding surfname " << " suname.size " << suname.size() + << " surface.subfold.size " << surface->subfold.size() << " surface.ff.size " + << surface->ff.size() << "\n" + << " snam::Desc(suname)\n" + << snam::Desc(suname) << "\n"; + + if (idx > -1) + assert(surfname); // all surf should have name, do not always have surf - NPFold* surf = surfname ? surface->get_subfold(surfname) : nullptr ; - bool is_implicit = surfname && strncmp(surfname, IMPLICIT_PREFIX, strlen(IMPLICIT_PREFIX) ) == 0 ; - int Type = -2 ; - int Finish = -2 ; - int ModelValuePercent = -2 ; - std::string OSN = "-" ; + NPFold *surf = surfname ? surface->get_subfold(surfname) : nullptr; + bool is_implicit = surfname && strncmp(surfname, IMPLICIT_PREFIX, strlen(IMPLICIT_PREFIX)) == 0; + int Type = -2; + int Finish = -2; + int ModelValuePercent = -2; + std::string OSN = "-"; - if( is_implicit ) + if (is_implicit) { - assert( surf == nullptr ) ; // not expecting to find surf for implicits - Type = 1 ; - Finish = 1 ; - ModelValuePercent = 100 ; // placeholders to match old_optical ones - OSN = "X" ; // Implicits classified as ordinary Surface as they have bnd/sur entries + assert(surf == nullptr); // not expecting to find surf for implicits + Type = 1; + Finish = 1; + ModelValuePercent = 100; // placeholders to match old_optical ones + OSN = "X"; // Implicits classified as ordinary Surface as they have bnd/sur entries } else { - int missing = 0 ; // -2 better, but use 0 to match old_optical - Type = surf ? surf->get_meta("Type",-1) : missing ; - Finish = surf ? surf->get_meta("Finish", -1 ) : missing ; - ModelValuePercent = surf ? int(100.*surf->get_meta("ModelValue", 0.)) : missing ; - OSN = surf ? surf->get_meta("OpticalSurfaceName", "-") : "-" ; + int missing = 0; // -2 better, but use 0 to match old_optical + Type = surf ? surf->get_meta("Type", -1) : missing; + Finish = surf ? surf->get_meta("Finish", -1) : missing; + ModelValuePercent = surf ? int(100. * surf->get_meta("ModelValue", 0.)) : missing; + OSN = surf ? surf->get_meta("OpticalSurfaceName", "-") : "-"; } - - char OSN0 = *OSN.c_str() ; - int ems = smatsur::TypeFromChar(OSN0) ; + char OSN0 = *OSN.c_str(); + int ems = smatsur::TypeFromChar(OSN0); /** HERE CAN DETECT FINISH AND ModelValuePercent THAT @@ -405,37 +385,26 @@ inline NP* sstandard::make_optical( FOR WHICH WILL NEED NEW smatsur.h enum value **/ - int Payload_Y = ems ; - - if(VERBOSE) std::cout - << " bnd:i " << std::setw(3) << i - << " sur:idx " << std::setw(3) << idx - << " Type " << std::setw(2) << Type - << " Finish " << std::setw(2) << Finish - << " MVP " << std::setw(3) << ModelValuePercent - << " surf " << ( surf ? "YES" : "NO " ) - << " impl " << ( is_implicit ? "YES" : "NO " ) - << " osn0 " << ( OSN0 == '\0' ? '0' : OSN0 ) - << " OSN " << OSN - << " ems " << ems - << " emsn " << smatsur::Name(ems) - << " surfname " << ( surfname ? surfname : "-" ) - << std::endl - ; - - op_v[op_index+0] = idx1 ; - op_v[op_index+1] = Payload_Y ; - op_v[op_index+2] = Finish ; - op_v[op_index+3] = ModelValuePercent ; + int Payload_Y = ems; + + if (VERBOSE) + std::cout << " bnd:i " << std::setw(3) << i << " sur:idx " << std::setw(3) << idx << " Type " + << std::setw(2) << Type << " Finish " << std::setw(2) << Finish << " MVP " << std::setw(3) + << ModelValuePercent << " surf " << (surf ? "YES" : "NO ") << " impl " + << (is_implicit ? "YES" : "NO ") << " osn0 " << (OSN0 == '\0' ? '0' : OSN0) << " OSN " + << OSN << " ems " << ems << " emsn " << smatsur::Name(ems) << " surfname " + << (surfname ? surfname : "-") << std::endl; + + op_v[op_index + 0] = idx1; + op_v[op_index + 1] = Payload_Y; + op_v[op_index + 2] = Finish; + op_v[op_index + 3] = ModelValuePercent; } } } - return op ; + return op; } - - - /** sstandard::make_bnd --------------------- @@ -444,116 +413,129 @@ Form bnd array by interleaving mat and sur array entries as directed by vbd int **/ -inline NP* sstandard::make_bnd( - const std::vector& vbd, - const std::vector& bdname, - const NP* mat, - const NP* sur ) +inline NP *sstandard::make_bnd(const std::vector &vbd, const std::vector &bdname, const NP *mat, + const NP *sur) { - assert( mat->shape.size() == 4 ); - assert( sur->shape.size() == 4 ); + assert(mat->shape.size() == 4); + assert(sur->shape.size() == 4); - int num_mat = mat->shape[0] ; - int num_sur = sur->shape[0] ; + int num_mat = mat->shape[0]; + int num_sur = sur->shape[0]; - for(int d=1 ; d < 4 ; d++) assert( mat->shape[d] == sur->shape[d] ) ; + for (int d = 1; d < 4; d++) + assert(mat->shape[d] == sur->shape[d]); - assert( mat->shape[1] == sprop::NUM_PAYLOAD_GRP ); - int num_domain = mat->shape[2] ; - assert( mat->shape[3] == sprop::NUM_PAYLOAD_VAL ); + assert(mat->shape[1] == sprop::NUM_PAYLOAD_GRP); + int num_domain = mat->shape[2]; + assert(mat->shape[3] == sprop::NUM_PAYLOAD_VAL); - const double* mat_v = mat->cvalues(); - const double* sur_v = sur->cvalues(); + const double *mat_v = mat->cvalues(); + const double *sur_v = sur->cvalues(); - int num_bnd = vbd.size() ; - int num_bdname = bdname.size() ; + int num_bnd = vbd.size(); + int num_bdname = bdname.size(); - bool num_bnd_expect = num_bnd == num_bdname ; - if(!num_bnd_expect) std::raise(SIGINT) ; - assert( num_bnd_expect); + bool num_bnd_expect = num_bnd == num_bdname; + if (!num_bnd_expect) + std::raise(SIGINT); + assert(num_bnd_expect); - int4 mn ; - int4 mx ; - column_range(mn, mx, vbd ); - if(VERBOSE) std::cout << " sstandard::bnd mn " << mn << " mx " << mx << std::endl ; + int4 mn; + int4 mx; + column_range(mn, mx, vbd); + if (VERBOSE) + std::cout << " sstandard::bnd mn " << mn << " mx " << mx << std::endl; - bool mat_expect = mx.x < num_mat && mx.w < num_mat ; - bool sur_expect = mx.y < num_sur && mx.z < num_sur ; + bool mat_expect = mx.x < num_mat && mx.w < num_mat; + bool sur_expect = mx.y < num_sur && mx.z < num_sur; - if(!mat_expect) std::raise(SIGINT); - if(!sur_expect) std::raise(SIGINT); + if (!mat_expect) + std::raise(SIGINT); + if (!sur_expect) + std::raise(SIGINT); - assert( mat_expect ); - assert( sur_expect ); + assert(mat_expect); + assert(sur_expect); - int ni = num_bnd ; // ~53 - int nj = sprop::NUM_MATSUR ; // 4 (omat,osur,isur,imat) - int nk = sprop::NUM_PAYLOAD_GRP ; // 2 - int nl = num_domain ; // 761 fine domain - int nn = sprop::NUM_PAYLOAD_VAL ; // 4 + int ni = num_bnd; // ~53 + int nj = sprop::NUM_MATSUR; // 4 (omat,osur,isur,imat) + int nk = sprop::NUM_PAYLOAD_GRP; // 2 + int nl = num_domain; // 761 fine domain + int nn = sprop::NUM_PAYLOAD_VAL; // 4 - int np = nk*nl*nn ; // 2*761*4 number of payload values for one mat/sur + int np = nk * nl * nn; // 2*761*4 number of payload values for one mat/sur - - NP* bnd_ = NP::Make(ni, nj, nk, nl, nn ); - bnd_->fill(-1.) ; // trying to match X4/GGeo unfilled - bnd_->set_names( bdname ); + NP *bnd_ = NP::Make(ni, nj, nk, nl, nn); + bnd_->fill(-1.); // trying to match X4/GGeo unfilled + bnd_->set_names(bdname); // metadata needed by QBnd::MakeBoundaryTex - bnd_->set_meta("domain_low", sdomain::DomainLow() ); - bnd_->set_meta("domain_high", sdomain::DomainHigh() ); - bnd_->set_meta("domain_step", sdomain::DomainStep() ); - bnd_->set_meta("domain_range", sdomain::DomainRange() ); + bnd_->set_meta("domain_low", sdomain::DomainLow()); + bnd_->set_meta("domain_high", sdomain::DomainHigh()); + bnd_->set_meta("domain_step", sdomain::DomainStep()); + bnd_->set_meta("domain_range", sdomain::DomainRange()); - double* bnd_v = bnd_->values() ; + double *bnd_v = bnd_->values(); - for(int i=0 ; i < ni ; i++) + for (int i = 0; i < ni; i++) { - std::array _bd = {{ vbd[i].x, vbd[i].y, vbd[i].z, vbd[i].w }} ; - for(int j=0 ; j < nj ; j++) + std::array _bd = {{vbd[i].x, vbd[i].y, vbd[i].z, vbd[i].w}}; + for (int j = 0; j < nj; j++) { - int ptr = _bd[j] ; // omat,osur,isur,imat index "pointer" into mat or sur arrays - if( ptr < 0 ) continue ; - bool is_mat = j == 0 || j == 3 ; - bool is_sur = j == 1 || j == 2 ; - if(is_mat) assert( ptr < num_mat ); - if(is_sur) assert( ptr < num_sur ); - - int src_index = ptr*np ; - int dst_index = (i*nj + j)*np ; - const double* src_v = is_mat ? mat_v : sur_v ; - - for(int p=0 ; p < np ; p++) bnd_v[dst_index + p] = src_v[src_index + p] ; + int ptr = _bd[j]; // omat,osur,isur,imat index "pointer" into mat or sur arrays + if (ptr < 0) + continue; + bool is_mat = j == 0 || j == 3; + bool is_sur = j == 1 || j == 2; + if (is_mat) + assert(ptr < num_mat); + if (is_sur) + assert(ptr < num_sur); + + int src_index = ptr * np; + int dst_index = (i * nj + j) * np; + const double *src_v = is_mat ? mat_v : sur_v; + + for (int p = 0; p < np; p++) + bnd_v[dst_index + p] = src_v[src_index + p]; } } - return bnd_ ; + return bnd_; } -inline void sstandard::column_range(int4& mn, int4& mx, const std::vector& vbd) +inline void sstandard::column_range(int4 &mn, int4 &mx, const std::vector &vbd) { - mn.x = std::numeric_limits::max() ; - mn.y = std::numeric_limits::max() ; - mn.z = std::numeric_limits::max() ; - mn.w = std::numeric_limits::max() ; + mn.x = std::numeric_limits::max(); + mn.y = std::numeric_limits::max(); + mn.z = std::numeric_limits::max(); + mn.w = std::numeric_limits::max(); - mx.x = std::numeric_limits::min() ; - mx.y = std::numeric_limits::min() ; - mx.z = std::numeric_limits::min() ; - mx.w = std::numeric_limits::min() ; + mx.x = std::numeric_limits::min(); + mx.y = std::numeric_limits::min(); + mx.z = std::numeric_limits::min(); + mx.w = std::numeric_limits::min(); int num = vbd.size(); - for(int i=0 ; i < num ; i++) + for (int i = 0; i < num; i++) { - const int4& b = vbd[i] ; - if(b.x > mx.x) mx.x = b.x ; - if(b.y > mx.y) mx.y = b.y ; - if(b.z > mx.z) mx.z = b.z ; - if(b.w > mx.w) mx.w = b.w ; - - if(b.x < mn.x) mn.x = b.x ; - if(b.y < mn.y) mn.y = b.y ; - if(b.z < mn.z) mn.z = b.z ; - if(b.w < mn.w) mn.w = b.w ; + const int4 &b = vbd[i]; + if (b.x > mx.x) + mx.x = b.x; + if (b.y > mx.y) + mx.y = b.y; + if (b.z > mx.z) + mx.z = b.z; + if (b.w > mx.w) + mx.w = b.w; + + if (b.x < mn.x) + mn.x = b.x; + if (b.y < mn.y) + mn.y = b.y; + if (b.z < mn.z) + mn.z = b.z; + if (b.w < mn.w) + mn.w = b.w; } } @@ -569,11 +551,11 @@ In principal it should give equivalent results to Geant4 interpolation. However its simpler to just use Geant4 interpolation from U4Tree level. **/ -inline NP* sstandard::unused_mat( const std::vector& names, const NPFold* fold ) +inline NP *sstandard::unused_mat(const std::vector &names, const NPFold *fold) { assert(0); - const sproplist* pl = sproplist::Material() ; - return unused_create(pl, names, fold ); + const sproplist *pl = sproplist::Material(); + return unused_create(pl, names, fold); } /** @@ -587,11 +569,11 @@ like the mat array this approach is anyhow unworkable as it stands. **/ -inline NP* sstandard::unused_sur( const std::vector& names, const NPFold* fold ) +inline NP *sstandard::unused_sur(const std::vector &names, const NPFold *fold) { assert(0); - const sproplist* pl = sproplist::Surface() ; - return unused_create(pl, names, fold ); + const sproplist *pl = sproplist::Surface(); + return unused_create(pl, names, fold); } /** @@ -603,60 +585,51 @@ and the array content. That is true for "mat" but not for "sur" **/ -inline NP* sstandard::unused_create(const sproplist* pl, const std::vector& names, const NPFold* fold ) +inline NP *sstandard::unused_create(const sproplist *pl, const std::vector &names, const NPFold *fold) { assert(0); - sdomain dom ; + sdomain dom; - int ni = names.size() ; - int nj = sprop::NUM_PAYLOAD_GRP ; - int nk = dom.length ; - int nl = sprop::NUM_PAYLOAD_VAL ; + int ni = names.size(); + int nj = sprop::NUM_PAYLOAD_GRP; + int nk = dom.length; + int nl = sprop::NUM_PAYLOAD_VAL; - NP* sta = NP::Make(ni, nj, nk, nl) ; + NP *sta = NP::Make(ni, nj, nk, nl); sta->set_names(names); - double* sta_v = sta->values(); + double *sta_v = sta->values(); - std::cout << "sstandard::create sta.sstr " << sta->sstr() << std::endl ; + std::cout << "sstandard::create sta.sstr " << sta->sstr() << std::endl; - for(int i=0 ; i < ni ; i++ ) // names + for (int i = 0; i < ni; i++) // names { - const char* name = names[i].c_str() ; - NPFold* sub = fold->get_subfold(name) ; - - std::cout - << std::setw(4) << i - << " : " - << std::setw(60) << name - << " : " - << sub->stats() - << std::endl - ; - - for(int j=0 ; j < nj ; j++) // payload groups + const char *name = names[i].c_str(); + NPFold *sub = fold->get_subfold(name); + + std::cout << std::setw(4) << i << " : " << std::setw(60) << name << " : " << sub->stats() << std::endl; + + for (int j = 0; j < nj; j++) // payload groups { - for(int k=0 ; k < nk ; k++) // wavelength + for (int k = 0; k < nk; k++) // wavelength { - //double wavelength_nm = dom.wavelength_nm[k] ; - double energy_eV = dom.energy_eV[k] ; - double energy = energy_eV * 1.e-6 ; // Geant4 actual energy unit is MeV + // double wavelength_nm = dom.wavelength_nm[k] ; + double energy_eV = dom.energy_eV[k]; + double energy = energy_eV * 1.e-6; // Geant4 actual energy unit is MeV - for(int l=0 ; l < nl ; l++) // payload values + for (int l = 0; l < nl; l++) // payload values { - const sprop* prop = pl->get(j,l) ; - assert( prop ); + const sprop *prop = pl->get(j, l); + assert(prop); - const char* pn = prop->name ; - const NP* a = sub->get(pn) ; - double value = a ? a->interp( energy ) : prop->def ; + const char *pn = prop->name; + const NP *a = sub->get(pn); + double value = a ? a->interp(energy) : prop->def; - int index = i*nj*nk*nl + j*nk*nl + k*nl + l ; - sta_v[index] = value ; + int index = i * nj * nk * nl + j * nk * nl + k * nl + l; + sta_v[index] = value; } } } } - return sta ; + return sta; } - - diff --git a/sysrap/sstate.h b/sysrap/sstate.h index 0299e756f..e51735533 100644 --- a/sysrap/sstate.h +++ b/sysrap/sstate.h @@ -25,7 +25,7 @@ BUT seems no point doing that, can just directly use them from PRD. struct sstate { float4 material1 ; // refractive_index/absorption_length/scattering_length/reemission_prob - float4 m1group2 ; // group_velocity/spare1/spare2/spare3 + float4 m1group2; // group_velocity/wls_absorption_length/spare2/spare3 float4 material2 ; float4 surface ; // detect/absorb/reflect_specular/reflect_diffuse @@ -71,7 +71,7 @@ inline std::ostream& operator<<(std::ostream& os, const sstate& s ) << " (refractive_index/absorption_length/scattering_length/reemission_prob) " << std::endl << " m1group2 " << s.m1group2 - << " (group_velocity/spare1/spare2/spare3) " + << " (group_velocity/wls_absorption_length/spare2/spare3) " << std::endl << " material2 " << s.material2 << " (refractive_index/absorption_length/scattering_length/reemission_prob) " diff --git a/tests/geom/DUNE_example_detector.gdml b/tests/geom/DUNE_example_detector.gdml new file mode 100644 index 000000000..11ae55814 --- /dev/null +++ b/tests/geom/DUNE_example_detector.gdml @@ -0,0 +1,373 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/geom/wls_scatter_viz.gdml b/tests/geom/wls_scatter_viz.gdml new file mode 100644 index 000000000..db1dc872b --- /dev/null +++ b/tests/geom/wls_scatter_viz.gdml @@ -0,0 +1,111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/geom/wls_slab.gdml b/tests/geom/wls_slab.gdml new file mode 100644 index 000000000..50eb18af5 --- /dev/null +++ b/tests/geom/wls_slab.gdml @@ -0,0 +1,113 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/geom/wls_test.gdml b/tests/geom/wls_test.gdml new file mode 100644 index 000000000..984f99bf4 --- /dev/null +++ b/tests/geom/wls_test.gdml @@ -0,0 +1,144 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_wavelength_shifting.sh b/tests/test_wavelength_shifting.sh new file mode 100755 index 000000000..74278fec8 --- /dev/null +++ b/tests/test_wavelength_shifting.sh @@ -0,0 +1,257 @@ +#!/bin/bash +# +# test_wavelength_shifting.sh +# ============================ +# End-to-end test: GPU vs G4 wavelength shifting physics +# +# Fires 10000 UV photons (350nm) from outside a WLS sphere into a scattering +# medium. Compares GPU (opticks) and G4 hit wavelength distributions, WLS +# conversion rate, and arrival time distributions using chi-squared test. +# +# Geometry: tests/geom/wls_scatter_viz.gdml +# - WLS sphere r=10mm (absorbs UV, re-emits visible) +# - Scattering medium (Rayleigh, 10mm mean free path) +# - Detector shell r=30mm (100% efficiency) +# +# Usage: +# ./tests/test_wavelength_shifting.sh [seed] +# +# Exit code 0 = PASS, 1 = FAIL +# + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +SEED=${1:-42} +NUMPHOTON=10000 +GEOM="$REPO_DIR/tests/geom/wls_scatter_viz.gdml" +CONFIG="wls_scatter_viz" + +source /opt/eic-opticks/eic-opticks-env.sh 2>/dev/null || true +export OPTICKS_MAX_BOUNCE=100 +export OPTICKS_EVENT_MODE=HitPhoton +export OPTICKS_MAX_SLOT=100000 + +echo "==============================================" +echo " WLS Test: GPU vs G4 Wavelength Shifting" +echo "==============================================" +echo " Geometry: $GEOM" +echo " Photons: $NUMPHOTON (350nm UV)" +echo " Seed: $SEED" +echo "" + +# --- GPU run --- +echo "[GPU] Running GPUPhotonSourceMinimal..." +GPU_OUT=$(/opt/eic-opticks/bin/GPUPhotonSourceMinimal \ + -g "$GEOM" -c "$CONFIG" -m "$REPO_DIR/tests/run.mac" -s "$SEED" 2>&1) +GPU_HITS=$(echo "$GPU_OUT" | grep "Opticks: NumHits" | head -1 | awk '{print $NF}') +echo "[GPU] Hits: $GPU_HITS" + +GPU_HIT_FILE="/tmp/MISSING_USER/opticks/GEOM/GEOM/GPUPhotonSourceMinimal/ALL0_no_opticks_event_name/A000/hit.npy" + +# --- G4 run --- +echo "[G4] Running StandAloneGeant4Validation..." +G4_OUT=$(/opt/eic-opticks/bin/StandAloneGeant4Validation \ + -g "$GEOM" -c "$CONFIG" -s "$SEED" 2>&1) +G4_HITS=$(echo "$G4_OUT" | grep "Total accumulated hits" | awk '{print $NF}') +echo "[G4] Hits: $G4_HITS" + +G4_HIT_FILE="g4_hits.npy" + +# --- Compare --- +echo "" +echo "[COMPARE] Analyzing wavelength and time distributions..." +echo "" + +python3 - "$GPU_HIT_FILE" "$G4_HIT_FILE" "$GPU_HITS" "$G4_HITS" << 'PYEOF' +import sys +import numpy as np + +gpu_hit_file = sys.argv[1] +g4_hit_file = sys.argv[2] +gpu_nhits = int(sys.argv[3]) +g4_nhits = int(sys.argv[4]) + +gpu = np.load(gpu_hit_file).reshape(-1, 4, 4) +g4 = np.load(g4_hit_file).reshape(-1, 4, 4) + +gpu_wl = gpu[:, 2, 3] +g4_wl = g4[:, 2, 3] +gpu_time = gpu[:, 0, 3] +g4_time = g4[:, 0, 3] + +PASS = True +ALPHA = 0.001 # significance level (tolerates minor ICDF interpolation difference) + + +def chi2_test(h_obs, h_exp, label): + """Chi-squared test for two histograms. Returns (chi2, ndf, p_value, pass).""" + # Scale expected to match observed total + scale = h_obs.sum() / h_exp.sum() if h_exp.sum() > 0 else 1.0 + h_exp_scaled = h_exp * scale + + # Only use bins with sufficient statistics (>5 expected) + mask = h_exp_scaled > 5 + if mask.sum() < 2: + print(f" {label}: Too few bins with sufficient stats") + return 0, 0, 1.0, True + + obs = h_obs[mask].astype(float) + exp = h_exp_scaled[mask].astype(float) + chi2 = np.sum((obs - exp) ** 2 / exp) + ndf = mask.sum() - 1 + + # p-value from chi2 distribution using Wilson-Hilferty approximation + if ndf > 0: + z = (chi2 / ndf) ** (1.0 / 3) - (1 - 2.0 / (9 * ndf)) + z /= np.sqrt(2.0 / (9 * ndf)) + # Approximate p-value from standard normal + p = 0.5 * (1.0 + math.erf(-z / np.sqrt(2))) + else: + p = 1.0 + + passed = p >= ALPHA + return chi2, ndf, p, passed + + +def ks_test(a, b): + """Two-sample Kolmogorov-Smirnov test.""" + a, b = np.sort(a), np.sort(b) + na, nb = len(a), len(b) + combined = np.concatenate([a, b]) + combined.sort() + cdf_a = np.searchsorted(a, combined, side='right') / na + cdf_b = np.searchsorted(b, combined, side='right') / nb + d = np.max(np.abs(cdf_a - cdf_b)) + en = np.sqrt(na * nb / (na + nb)) + p = min(np.exp(-2.0 * (en * d) ** 2) * 2.0, 1.0) + return d, p + + +# ------------------------------------------------------- +# Test 1: Hit count comparison +# ------------------------------------------------------- +print("=" * 55) +print(" TEST 1: Hit Count") +print("=" * 55) +print(f" GPU: {len(gpu)}") +print(f" G4: {len(g4)}") +import math +sigma = math.sqrt(len(gpu) + len(g4)) +z = abs(len(gpu) - len(g4)) / sigma if sigma > 0 else 0 +print(f" |Z| = {z:.1f}σ") +t1_pass = z < 5 +status = "PASS" if t1_pass else "FAIL" +print(f" Result: {status} (threshold: 5σ)") +PASS = PASS and t1_pass + + +# ------------------------------------------------------- +# Test 2: WLS conversion fraction +# ------------------------------------------------------- +print() +print("=" * 55) +print(" TEST 2: WLS Conversion Fraction") +print("=" * 55) +WLS_THRESHOLD = 380 # nm + +gpu_frac = np.mean(gpu_wl > WLS_THRESHOLD) +g4_frac = np.mean(g4_wl > WLS_THRESHOLD) +frac_diff = abs(gpu_frac - g4_frac) + +print(f" GPU shifted: {100*gpu_frac:.1f}%") +print(f" G4 shifted: {100*g4_frac:.1f}%") +print(f" |Difference|: {100*frac_diff:.2f}%") +t2_pass = frac_diff < 0.03 # 3% tolerance +status = "PASS" if t2_pass else "FAIL" +print(f" Result: {status} (threshold: 3%)") +PASS = PASS and t2_pass + + +# Pre-compute shifted/unshifted arrays +gpu_shifted = gpu_wl[gpu_wl > WLS_THRESHOLD] +g4_shifted = g4_wl[g4_wl > WLS_THRESHOLD] + +# ------------------------------------------------------- +# Test 3: Shifted wavelength spectrum (KS test) +# ------------------------------------------------------- +print() +print("=" * 55) +print(" TEST 3: Shifted Wavelength Spectrum (KS Test)") +print("=" * 55) + +if len(gpu_shifted) > 10 and len(g4_shifted) > 10: + d, p3 = ks_test(gpu_shifted, g4_shifted) + print(f" GPU shifted: N={len(gpu_shifted)}, mean={gpu_shifted.mean():.1f}nm") + print(f" G4 shifted: N={len(g4_shifted)}, mean={g4_shifted.mean():.1f}nm") + print(f" KS D={d:.6f} p={p3:.4f}") + t3_pass = p3 >= ALPHA +else: + print(" Too few shifted photons for KS test") + t3_pass = True + +status = "PASS" if t3_pass else "FAIL" +print(f" Result: {status} (threshold: p > {ALPHA})") +PASS = PASS and t3_pass + + +# ------------------------------------------------------- +# Test 4: Arrival time for shifted photons (KS test) +# ------------------------------------------------------- +print() +print("=" * 55) +print(" TEST 4: Shifted Photon Arrival Time (KS Test)") +print("=" * 55) + +# Compare shifted photon times — these include WLS exponential delay + transport +# With the G4 WLS time profile set to "exponential", distributions should match +gpu_shifted_t = gpu_time[gpu_wl > WLS_THRESHOLD] +g4_shifted_t = g4_time[g4_wl > WLS_THRESHOLD] + +print(f" GPU shifted: N={len(gpu_shifted_t)}, mean={gpu_shifted_t.mean():.3f}ns, std={gpu_shifted_t.std():.3f}ns") +print(f" G4 shifted: N={len(g4_shifted_t)}, mean={g4_shifted_t.mean():.3f}ns, std={g4_shifted_t.std():.3f}ns") +print(f" Std ratio: {gpu_shifted_t.std()/g4_shifted_t.std():.3f} (expect ~1.0)") + +if len(gpu_shifted_t) > 10 and len(g4_shifted_t) > 10: + d_t, p_t = ks_test(gpu_shifted_t, g4_shifted_t) + print(f" KS D={d_t:.6f} p={p_t:.4f}") + t4_pass = p_t >= ALPHA +else: + print(" Too few shifted photons for KS test") + t4_pass = True + +# Also check unshifted time (pure transport, no WLS delay) +gpu_unshifted_t = gpu_time[gpu_wl <= WLS_THRESHOLD] +g4_unshifted_t = g4_time[g4_wl <= WLS_THRESHOLD] +print(f" Unshifted time: GPU mean={gpu_unshifted_t.mean():.3f}ns G4 mean={g4_unshifted_t.mean():.3f}ns") + +status = "PASS" if t4_pass else "FAIL" +print(f" Result: {status} (KS p > {ALPHA})") +PASS = PASS and t4_pass + + +# ------------------------------------------------------- +# Summary +# ------------------------------------------------------- +print() +print("=" * 55) +print(" SUMMARY") +print("=" * 55) +tests = [ + ("Hit count", t1_pass), + ("WLS fraction", t2_pass), + ("Shifted wavelength KS", t3_pass), + ("Shifted time KS", t4_pass), +] +for name, passed in tests: + print(f" {name:>25s}: {'PASS' if passed else 'FAIL'}") + +print() +if PASS: + print(" *** ALL TESTS PASSED ***") + sys.exit(0) +else: + print(" *** SOME TESTS FAILED ***") + sys.exit(1) +PYEOF diff --git a/tools/generate_precooked_rng.cu b/tools/generate_precooked_rng.cu new file mode 100644 index 000000000..79cb2910d --- /dev/null +++ b/tools/generate_precooked_rng.cu @@ -0,0 +1,113 @@ +/** +generate_precooked_rng.cu +========================== + +Generates precooked curand Philox sequences for U4Random aligned mode. +Each photon gets its own random stream matching the GPU simulation. + +Build: + nvcc -o generate_precooked_rng tools/generate_precooked_rng.cu \ + -I. -I/opt/eic-opticks/include/eic-opticks -lcurand -std=c++17 + +Usage: + ./generate_precooked_rng [num_photons] [num_randoms_per_photon] + Defaults: 100000 photons, 256 randoms each (nj=16, nk=16) + +Output: + ~/.opticks/precooked/QSimTest/rng_sequence/ + rng_sequence_f_ni_nj_nk_tranche/ + rng_sequence_f_ni_nj_nk_ioffset000000.npy +*/ + +#include +#include +#include +#include +#include + +#include +#include "sysrap/NP.hh" + +__global__ void generate_sequences(float* out, unsigned ni, unsigned nv, unsigned id_offset) +{ + unsigned idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= ni) return; + + unsigned photon_idx = id_offset + idx; + + // Match GPU simulation: curand_init(seed=0, subsequence=photon_idx, offset=0) + curandStatePhilox4_32_10_t rng; + curand_init(0ULL, (unsigned long long)photon_idx, 0ULL, &rng); + + float* row = out + idx * nv; + for (unsigned j = 0; j < nv; j++) + row[j] = curand_uniform(&rng); +} + +static void mkdirp(const char* path) +{ + char tmp[1024]; + snprintf(tmp, sizeof(tmp), "%s", path); + for (char* p = tmp + 1; *p; p++) + { + if (*p == '/') { *p = 0; mkdir(tmp, 0755); *p = '/'; } + } + mkdir(tmp, 0755); +} + +int main(int argc, char** argv) +{ + unsigned ni = 100000; + unsigned nj = 16; + unsigned nk = 16; + + if (argc > 1) ni = atoi(argv[1]); + if (argc > 2) + { + unsigned total = atoi(argv[2]); + nj = 1; nk = total; + for (unsigned f = 2; f * f <= total; f++) + { + if (total % f == 0 && f <= 64) { nj = f; nk = total / f; } + } + } + + unsigned nv = nj * nk; + printf("Generating precooked curand Philox sequences:\n"); + printf(" photons: %u, randoms/photon: %u (nj=%u, nk=%u), memory: %.1f MB\n", + ni, nv, nj, nk, (double)ni * nv * sizeof(float) / (1024 * 1024)); + + const char* home = getenv("HOME"); + char dirpath[512], filename[256], fullpath[768]; + + snprintf(dirpath, sizeof(dirpath), + "%s/.opticks/precooked/QSimTest/rng_sequence/rng_sequence_f_ni%u_nj%u_nk%u_tranche%u", + home, ni, nj, nk, ni); + mkdirp(dirpath); + + snprintf(filename, sizeof(filename), + "rng_sequence_f_ni%u_nj%u_nk%u_ioffset%06u.npy", ni, nj, nk, 0); + snprintf(fullpath, sizeof(fullpath), "%s/%s", dirpath, filename); + + float* d_out = nullptr; + cudaMalloc(&d_out, (size_t)ni * nv * sizeof(float)); + + unsigned threads = 256; + unsigned blocks = (ni + threads - 1) / threads; + generate_sequences<<>>(d_out, ni, nv, 0); + cudaDeviceSynchronize(); + + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { fprintf(stderr, "CUDA error: %s\n", cudaGetErrorString(err)); return 1; } + + NP* seq = NP::Make(ni, nj, nk); + cudaMemcpy(seq->values(), d_out, (size_t)ni * nv * sizeof(float), cudaMemcpyDeviceToHost); + cudaFree(d_out); + + seq->save(fullpath); + printf("Saved: %s\n", fullpath); + printf("Set OPTICKS_RANDOM_SEQPATH=%s\n", fullpath); + + delete seq; + return 0; +} diff --git a/u4/CMakeLists.txt b/u4/CMakeLists.txt index 3a18458d9..868af18b9 100644 --- a/u4/CMakeLists.txt +++ b/u4/CMakeLists.txt @@ -69,6 +69,7 @@ set(HEADERS U4Material.hh U4Mat.h U4Scint.h + U4WLS.h U4Volume.h U4Surface.h diff --git a/u4/U4Tree.h b/u4/U4Tree.h index c7495afad..b92865f67 100644 --- a/u4/U4Tree.h +++ b/u4/U4Tree.h @@ -82,6 +82,7 @@ controlled via envvar:: #include "U4Mesh.h" #include "U4Scint.h" +#include "U4WLS.h" #include "U4Solid.h" #include "U4PhysicsTable.h" @@ -112,6 +113,7 @@ struct U4Tree std::vector solids ; U4PhysicsTable* rayleigh_table ; U4Scint* scint ; + U4WLS *wls; // disable the below with settings with by defining the below envvar static constexpr const char* __DISABLE_OSUR_IMPLICIT = "U4Tree__DISABLE_OSUR_IMPLICIT" ; @@ -152,6 +154,7 @@ struct U4Tree void initMaterial(const G4Material* const mt); void initScint(); + void initWLS(); void initSurfaces(); void initSolids(); @@ -262,6 +265,7 @@ inline U4Tree::U4Tree( num_surface_standard(-1), rayleigh_table(CreateRayleighTable()), scint(nullptr), + wls(nullptr), enable_osur(!ssys::getenvbool(__DISABLE_OSUR_IMPLICIT)), enable_isur(!ssys::getenvbool(__DISABLE_ISUR_IMPLICIT)), material_debug(ssys::getenvint(__MATERIAL_DEBUG,0)), @@ -292,6 +296,9 @@ inline void U4Tree::init() LOG(LEVEL) << "-initScint" ; initScint(); + LOG(LEVEL) << "-initWLS"; + initWLS(); + LOG(LEVEL) << "-initSurfaces" ; initSurfaces(); @@ -384,6 +391,28 @@ inline void U4Tree::initScint() } } +/** +U4Tree::initWLS +------------------ + +Scans all G4 materials for WLS properties (WLSCOMPONENT, WLSTIMECONSTANT). +Creates inverse CDF texture data and material mapping for GPU-side WLS +wavelength sampling. Stored in st->standard for serialization and upload. + +**/ + +inline void U4Tree::initWLS() +{ + wls = U4WLS::Create(st->material, materials); + if (wls) + { + st->standard->wls_icdf = wls->icdf; + st->standard->wls_mat_map = wls->mat_map; + st->standard->wls_time_constants = wls->time_constants; + LOG(LEVEL) << wls->desc(); + } +} + /** U4Tree::CreateRayleighTable ---------------------------- diff --git a/u4/U4WLS.h b/u4/U4WLS.h new file mode 100644 index 000000000..c915ae92d --- /dev/null +++ b/u4/U4WLS.h @@ -0,0 +1,209 @@ +#pragma once +/** +U4WLS.h : Wavelength Shifting ICDF Creation +=============================================== + +Creates inverse CDF textures for wavelength shifting (WLS) materials, +analogous to U4Scint.h for scintillation. Supports multiple WLS materials +by stacking ICDF rows into a single texture. + +For each material with a WLSCOMPONENT property: +1. Integrates the emission spectrum to get a CDF +2. Inverts it at 4096 uniformly-spaced CDF values (3 resolutions for HD) +3. Extracts WLSTIMECONSTANT from the material properties table + +The output arrays: +- icdf: shape (num_wls_mat*3, 4096, 1) — stacked HD ICDF rows +- mat_map: shape (num_total_mat,) int — maps material index to WLS row (-1 = no WLS) +- time_constants: shape (num_wls_mat,) float — per-WLS-material time constant + +The G4 WLS process (G4OpWLS) uses these material properties: +- WLSABSLENGTH: absorption length as f(energy) — handled via boundary texture +- WLSCOMPONENT: emission spectrum as f(energy) — converted to ICDF here +- WLSTIMECONSTANT: re-emission time delay (scalar) — extracted here + +**/ + +#include +#include +#include +#include +#include + +#include "G4Material.hh" +#include "G4MaterialPropertiesTable.hh" +#include "G4MaterialPropertyVector.hh" +#include "G4PhysicalConstants.hh" +#include "G4SystemOfUnits.hh" + +#include "NP.hh" +#include "NPFold.h" +#include "SLOG.hh" +#include "U4MaterialPropertyVector.h" +#include "U4Scint.h" // reuse Integral and CreateGeant4InterpolatedInverseCDF + +struct U4WLS +{ + static constexpr const char *WLSCOMPONENT_KEY = "WLSCOMPONENT"; + static constexpr const char *WLSTIMECONSTANT_KEY = "WLSTIMECONSTANT"; + + static U4WLS *Create(const NPFold *materials, const std::vector &mats); + + const NP *icdf; // (num_wls*3, 4096, 1) stacked HD ICDF for all WLS materials + const NP *mat_map; // (num_total_mat,) int: material idx -> base ICDF row, or -1 + const NP *time_constants; // (num_wls,) float: time constant per WLS material + + unsigned num_wls; + unsigned num_mat; + + U4WLS(const std::vector &mats, const std::vector &wls_indices, + const std::vector &wls_components, + const std::vector &wls_time_consts); + + std::string desc() const; +}; + +/** +U4WLS::Create +--------------- + +Scans all materials for WLSCOMPONENT property. For each material +that has it, extracts the emission spectrum and time constant. + +Returns nullptr if no WLS materials are found. + +**/ + +inline U4WLS *U4WLS::Create(const NPFold *materials, const std::vector &mats) +{ + std::vector wls_indices; + std::vector wls_components; + std::vector wls_time_consts; + + for (unsigned i = 0; i < mats.size(); i++) + { + const G4Material *mat = mats[i]; + G4MaterialPropertiesTable *mpt = mat->GetMaterialPropertiesTable(); + if (mpt == nullptr) + continue; + + G4MaterialPropertyVector *wlscomp = mpt->GetProperty(WLSCOMPONENT_KEY); + if (wlscomp == nullptr) + continue; + + // Found a WLS material + wls_indices.push_back(i); + wls_components.push_back(wlscomp); + + // Extract time constant (scalar property, default 0 = instant re-emission) + double tc = 0.0; + if (mpt->ConstPropertyExists(WLSTIMECONSTANT_KEY)) + { + tc = mpt->GetConstProperty(WLSTIMECONSTANT_KEY) / ns; // convert to ns + } + wls_time_consts.push_back(tc); + } + + if (wls_indices.empty()) + return nullptr; + + return new U4WLS(mats, wls_indices, wls_components, wls_time_consts); +} + +/** +U4WLS::U4WLS +-------------- + +Builds the ICDF texture data and material mapping arrays. + +For each WLS material: +1. Integrate WLSCOMPONENT to get CDF (reuses U4Scint::Integral) +2. Build 3-layer HD ICDF (reuses U4Scint::CreateGeant4InterpolatedInverseCDF) +3. Stack into combined ICDF array + +**/ + +inline U4WLS::U4WLS(const std::vector &mats, const std::vector &wls_indices, + const std::vector &wls_components, + const std::vector &wls_time_consts) + : icdf(nullptr), mat_map(nullptr), time_constants(nullptr), num_wls(wls_indices.size()), num_mat(mats.size()) +{ + assert(num_wls > 0); + assert(wls_components.size() == num_wls); + assert(wls_time_consts.size() == num_wls); + + int num_bins = 4096; + int hd_factor = 20; + + // Build per-material ICDFs and stack them + std::vector icdfs; + for (unsigned w = 0; w < num_wls; w++) + { + const G4MaterialPropertyVector *comp = wls_components[w]; + const G4Material *mat = mats[wls_indices[w]]; + const char *matname = mat->GetName().c_str(); + + // Integrate emission spectrum to get CDF + G4MaterialPropertyVector *integral = U4Scint::Integral(comp); + + // Build 3-layer HD ICDF (wavelength values in nm) + NP *one_icdf = U4Scint::CreateGeant4InterpolatedInverseCDF(integral, num_bins, hd_factor, matname, + false /*energy_not_wavelength*/ + ); + + assert(one_icdf); + assert(one_icdf->has_shape(3, num_bins, 1)); + icdfs.push_back(one_icdf); + } + + // Stack all ICDFs into a single array: (num_wls*3, 4096, 1) + { + NP *stacked = NP::Make(num_wls * 3, num_bins, 1); + double *dst = stacked->values(); + for (unsigned w = 0; w < num_wls; w++) + { + const double *src = icdfs[w]->cvalues(); + unsigned row_size = 3 * num_bins * 1; + memcpy(dst + w * row_size, src, row_size * sizeof(double)); + } + stacked->set_meta("hd_factor", hd_factor); + stacked->set_meta("num_bins", num_bins); + stacked->set_meta("num_wls", num_wls); + icdf = stacked; + } + + // Build material index -> ICDF row mapping + // For material i, mat_map[i] = base row in ICDF texture (0, 3, 6, ...) + // or -1 if material has no WLS + { + NP *mm = NP::Make(num_mat); + int *mm_v = mm->values(); + for (unsigned i = 0; i < num_mat; i++) + mm_v[i] = -1; + for (unsigned w = 0; w < num_wls; w++) + { + mm_v[wls_indices[w]] = w * 3; // base row for this WLS material's 3 HD layers + } + mat_map = mm; + } + + // Build time constants array (in ns) + { + NP *tc = NP::Make(num_wls); + float *tc_v = tc->values(); + for (unsigned w = 0; w < num_wls; w++) + { + tc_v[w] = float(wls_time_consts[w]); + } + time_constants = tc; + } +} + +inline std::string U4WLS::desc() const +{ + std::stringstream ss; + ss << "U4WLS::desc" << " num_wls " << num_wls << " num_mat " << num_mat << " icdf " << (icdf ? icdf->sstr() : "-") + << " mat_map " << (mat_map ? mat_map->sstr() : "-") << " time_constants " + << (time_constants ? time_constants->sstr() : "-"); + return ss.str(); +}