diff --git a/.github/workflows/build-pull-request.yaml b/.github/workflows/build-pull-request.yaml
index 2a8b63fe7..b04e065b2 100644
--- a/.github/workflows/build-pull-request.yaml
+++ b/.github/workflows/build-pull-request.yaml
@@ -79,3 +79,4 @@ jobs:
docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_GPURaytrace.sh
docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_GPUPhotonFileSource.sh
docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_GPUPhotonSource_8x8SiPM.sh
+ docker run --rm --gpus 'device=1' ${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }} tests/test_wavelength_shifting.sh
diff --git a/README.md b/README.md
index b1933482c..9f02b0cb7 100644
--- a/README.md
+++ b/README.md
@@ -155,6 +155,7 @@ EIC-Opticks provides several examples demonstrating GPU-accelerated optical phot
| `GPUPhotonSource` | Optical photons (torch) | Any GDML | G4 + GPU side-by-side validation |
| `GPUPhotonSourceMinimal` | Optical photons (torch) | Any GDML | GPU-only test |
| `GPUPhotonFileSource` | Optical photons (text file) | Any GDML | GPU-only, user-defined photons from file |
+| WLS test | Wavelength shifting | WLS sphere + detector shell | Validate GPU WLS physics |
### Example 1: GPUCerenkov (Cerenkov Only)
@@ -297,6 +298,55 @@ GPUPhotonFileSource -g tests/geom/opticks_raindrop.gdml -p my_photons.txt -m run
**Source files:** `src/GPUPhotonFileSource.cpp`, `src/GPUPhotonFileSource.h`
+### Example 6: Wavelength Shifting (WLS) Test
+
+This test validates the GPU wavelength shifting implementation using a dedicated
+geometry with a WLS sphere surrounded by a detector shell:
+
+```
+Geometry: wls_test.gdml
+├── Air world (r=200 mm)
+│ ├── WLS sphere (r=20 mm) ← Absorbs UV, re-emits visible
+│ └── Glass detector shell (r=28-30 mm) ← 100% detection efficiency
+```
+
+The WLS material absorbs UV photons (350 nm) and re-emits them isotropically at
+longer wavelengths (peak ~481 nm) with a 0.5 ns exponential time delay. The test
+fires 1000 monochromatic 350 nm photons from the origin into the WLS sphere.
+
+```bash
+GPUPhotonSourceMinimal -g tests/geom/wls_test.gdml -c wls_test -m tests/run.mac -s 42
+```
+
+**Expected results:**
+- ~990/1000 photons detected (10 absorbed after failing energy conservation)
+- All hits wavelength-shifted from 350 nm to mean ~487 nm
+- Energy conservation: no hits with wavelength < 350 nm
+- Isotropic re-emission: mean momentum direction near zero
+- Time delay: mean ~0.6 ns (propagation + 0.5 ns exponential WLS decay)
+
+**GDML WLS properties required** (same syntax for G4 10.x and 11.x):
+```xml
+
+
+
+
+
+
+
+
+
+
+
+
+```
+
+Unlike scintillation properties, WLS property names are the same in both Geant4
+10.x and 11.x — no dual-naming is needed.
+
+**Test files:** `tests/geom/wls_test.gdml`, `config/wls_test.json`
+**Implementation docs:** `docs/WLS_IMPLEMENTATION.md`
+
### Torch configuration
`GPUPhotonSource` and `GPUPhotonSourceMinimal` read photon source parameters from a
@@ -317,6 +367,7 @@ JSON config file (default `config/dev.json`). Key fields:
|---------|-------------|-------------|-----------------|----------------------|---------------------|
| Cerenkov genstep collection | ✓ | ✓ | ✗ | ✗ | ✗ |
| Scintillation genstep collection | ✗ | ✓ | ✗ | ✗ | ✗ |
+| Wavelength shifting (WLS) | ✓ | ✓ | ✓ | ✓ | ✓ |
| Torch photon generation | ✗ | ✗ | ✓ | ✓ | ✗ |
| Photon input from text file | ✗ | ✗ | ✗ | ✗ | ✓ |
| G4 optical photon tracking | ✓ | ✓ | ✓ | ✗ | ✗ |
diff --git a/config/wls_100k.json b/config/wls_100k.json
new file mode 100644
index 000000000..26166e47b
--- /dev/null
+++ b/config/wls_100k.json
@@ -0,0 +1,30 @@
+{
+ "torch": {
+ "gentype": "TORCH",
+ "trackid": 0,
+ "matline": 0,
+ "numphoton": 100000,
+
+ "pos": [0.0, 0.0, 0.0],
+ "time": 0.0,
+
+ "mom": [0.0, 0.0, 1.0],
+ "weight": 0.0,
+
+ "pol": [1.0, 0.0, 0.0],
+ "wavelength": 350.0,
+
+ "zenith": [0.0, 1.0],
+ "azimuth": [0.0, 1.0],
+
+ "radius": 0.0,
+ "distance": 0.0,
+ "mode": 255,
+ "type": "disc"
+ },
+
+ "event": {
+ "mode": "DebugLite",
+ "maxslot": 10000000
+ }
+}
diff --git a/config/wls_scatter_viz.json b/config/wls_scatter_viz.json
new file mode 100644
index 000000000..76bd9ece2
--- /dev/null
+++ b/config/wls_scatter_viz.json
@@ -0,0 +1,30 @@
+{
+ "torch": {
+ "gentype": "TORCH",
+ "trackid": 0,
+ "matline": 0,
+ "numphoton": 10000,
+
+ "pos": [0.0, 0.0, -25.0],
+ "time": 0.0,
+
+ "mom": [0.0, 0.0, 1.0],
+ "weight": 0.0,
+
+ "pol": [1.0, 0.0, 0.0],
+ "wavelength": 350.0,
+
+ "zenith": [0.0, 0.3],
+ "azimuth": [0.0, 1.0],
+
+ "radius": 0.0,
+ "distance": 0.0,
+ "mode": 255,
+ "type": "disc"
+ },
+
+ "event": {
+ "mode": "HitPhoton",
+ "maxslot": 100000
+ }
+}
diff --git a/config/wls_slab.json b/config/wls_slab.json
new file mode 100644
index 000000000..bfd412305
--- /dev/null
+++ b/config/wls_slab.json
@@ -0,0 +1,30 @@
+{
+ "torch": {
+ "gentype": "TORCH",
+ "trackid": 0,
+ "matline": 0,
+ "numphoton": 100000,
+
+ "pos": [0.0, 0.0, -50.0],
+ "time": 0.0,
+
+ "mom": [0.0, 0.0, 1.0],
+ "weight": 0.0,
+
+ "pol": [1.0, 0.0, 0.0],
+ "wavelength": 400.0,
+
+ "zenith": [0.0, 0.0],
+ "azimuth": [0.0, 1.0],
+
+ "radius": 0.0,
+ "distance": 0.0,
+ "mode": 255,
+ "type": "disc"
+ },
+
+ "event": {
+ "mode": "DebugLite",
+ "maxslot": 10000000
+ }
+}
diff --git a/config/wls_test.json b/config/wls_test.json
new file mode 100644
index 000000000..b8572b5b9
--- /dev/null
+++ b/config/wls_test.json
@@ -0,0 +1,30 @@
+{
+ "torch": {
+ "gentype": "TORCH",
+ "trackid": 0,
+ "matline": 0,
+ "numphoton": 1000,
+
+ "pos": [0.0, 0.0, 0.0],
+ "time": 0.0,
+
+ "mom": [0.0, 0.0, 1.0],
+ "weight": 0.0,
+
+ "pol": [1.0, 0.0, 0.0],
+ "wavelength": 350.0,
+
+ "zenith": [0.0, 1.0],
+ "azimuth": [0.0, 1.0],
+
+ "radius": 0.0,
+ "distance": 0.0,
+ "mode": 255,
+ "type": "disc"
+ },
+
+ "event": {
+ "mode": "DebugLite",
+ "maxslot": 1000000
+ }
+}
diff --git a/examples/benchmark_apex.sh b/examples/benchmark_apex.sh
new file mode 100755
index 000000000..2a4de6f83
--- /dev/null
+++ b/examples/benchmark_apex.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# benchmark_apex.sh — Measure GPU vs G4 speedup on apex.gdml
+#
+# Usage:
+# ./examples/benchmark_apex.sh
+
+GDML="apex.gdml"
+MACRO="tests/run.mac"
+EPS="0.00001"
+EPS0="0.0006"
+OUTDIR="plots"
+CONFIG="det_debug"
+
+if [ ! -f "$GDML" ]; then
+ echo "ERROR: $GDML not found. Run from the eic-opticks root directory."
+ exit 1
+fi
+
+echo "=== apex.gdml Benchmark ==="
+echo "eps=$EPS, eps0=$EPS0"
+echo "Running..."
+
+LOGFILE=$(mktemp /tmp/bench_XXXXXX.txt)
+OPTICKS_MAX_BOUNCE=1000 \
+OPTICKS_PROPAGATE_EPSILON=$EPS \
+OPTICKS_PROPAGATE_EPSILON0=$EPS0 \
+GPURaytrace -g "$GDML" -m "$MACRO" -c "$CONFIG" &> "$LOGFILE" || true
+
+GPU_TIME=$(grep "Simulation time:" "$LOGFILE" | awk '{print $3}')
+G4_LINE=$(grep "^ User=" "$LOGFILE" | tail -1)
+G4_CPU=$(echo "$G4_LINE" | grep -oP 'User=\K[0-9.]+')
+G4_WALL=$(echo "$G4_LINE" | grep -oP 'Real=\K[0-9.]+')
+NPHOTONS=$(grep "NumCollected:" "$LOGFILE" | tail -1 | awk '{print $NF}')
+GPU_HITS=$(grep "Opticks: NumHits:" "$LOGFILE" | awk '{print $NF}')
+G4_HITS=$(grep "Geant4: NumHits:" "$LOGFILE" | awk '{print $NF}')
+
+if [ -z "$GPU_TIME" ] || [ -z "$G4_CPU" ]; then
+ echo "ERROR: Could not parse timing from output"
+ tail -30 "$LOGFILE"
+ rm -f "$LOGFILE"
+ exit 1
+fi
+
+python3 -c "
+gpu = float('$GPU_TIME')
+g4_cpu = float('$G4_CPU')
+g4_wall = float('$G4_WALL')
+nphotons = int('$NPHOTONS')
+gpu_hits = int('$GPU_HITS')
+g4_hits = int('$G4_HITS')
+hit_diff = (gpu_hits - g4_hits) / g4_hits * 100 if g4_hits > 0 else 0
+
+print()
+print(f'Photons: {nphotons:>10,}')
+print(f'GPU sim time: {gpu:>10.4f} s')
+print(f'G4 CPU time: {g4_cpu:>10.2f} s')
+print(f'G4 wall time: {g4_wall:>10.2f} s')
+print()
+print(f'Speedup (CPU): {g4_cpu/gpu:>10.0f}x')
+print(f'Speedup (wall): {g4_wall/gpu:>10.0f}x')
+print()
+print(f'GPU rate: {nphotons/gpu/1e6:>10.1f} M photons/s')
+print(f'G4 rate: {nphotons/g4_cpu/1e3:>10.1f} k photons/s')
+print()
+print(f'GPU hits: {gpu_hits:>10}')
+print(f'G4 hits: {g4_hits:>10}')
+print(f'Hit diff: {hit_diff:>+9.1f}%')
+"
+
+rm -f "$LOGFILE"
+
+# Generate comparison plots if hit files exist
+if [ -f "gpu_hits.npy" ] && [ -f "g4_hits.npy" ]; then
+ echo ""
+ echo "=== Generating comparison plots ==="
+ python3 optiphy/ana/run_and_compare.py --gpu-hits gpu_hits.npy --g4-hits g4_hits.npy --outdir "$OUTDIR" 2>&1 | tail -15
+ echo "Plots saved to $OUTDIR/"
+fi
diff --git a/optiphy/ana/compare_aligned.py b/optiphy/ana/compare_aligned.py
new file mode 100644
index 000000000..b044ce6f2
--- /dev/null
+++ b/optiphy/ana/compare_aligned.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+"""
+compare_aligned.py - Photon-by-photon comparison of GPU vs G4 aligned simulations.
+
+Usage:
+ python compare_aligned.py
+
+Performs:
+ 1. Per-photon flag comparison (exact match rate)
+ 2. Position comparison at multiple thresholds
+ 3. Chi-squared test on flag distributions (gold-standard validation metric)
+ 4. Glancing-angle photon identification (normal sign ambiguity)
+ 5. Divergent photon listing
+"""
+import sys
+import numpy as np
+
+FLAG_NAMES = {
+ 0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT",
+ 0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB",
+ 0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT",
+ 0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT", 0x8000: "MISS",
+}
+
+def flag_name(f):
+ return FLAG_NAMES.get(f, f"0x{f:04x}")
+
+def extract_flag(photon):
+ """Extract flag from q3.x (orient_boundary_flag) - lower 16 bits."""
+ q3 = photon.view(np.uint32).reshape(-1, 4, 4)
+ return q3[:, 3, 0] & 0xFFFF
+
+def chi2_flag_distribution(gpu_flags, g4_flags):
+ """
+ Chi-squared comparison of flag distributions.
+
+ Compares the frequency of each flag value between GPU and G4.
+ This is the opticks gold-standard validation metric.
+
+ Returns (chi2, ndof, flags_used, gpu_counts, g4_counts).
+ """
+ all_flags = sorted(set(gpu_flags) | set(g4_flags))
+ gpu_counts = np.array([(gpu_flags == f).sum() for f in all_flags], dtype=float)
+ g4_counts = np.array([(g4_flags == f).sum() for f in all_flags], dtype=float)
+
+ total = gpu_counts + g4_counts
+ mask = total > 0
+ gpu_c = gpu_counts[mask]
+ g4_c = g4_counts[mask]
+ tot = total[mask]
+ flags_used = [f for f, m in zip(all_flags, mask) if m]
+
+ n_gpu = gpu_c.sum()
+ n_g4 = g4_c.sum()
+ expected_gpu = tot * n_gpu / (n_gpu + n_g4)
+ expected_g4 = tot * n_g4 / (n_gpu + n_g4)
+
+ chi2 = 0.0
+ for i in range(len(flags_used)):
+ if expected_gpu[i] > 0:
+ chi2 += (gpu_c[i] - expected_gpu[i])**2 / expected_gpu[i]
+ if expected_g4[i] > 0:
+ chi2 += (g4_c[i] - expected_g4[i])**2 / expected_g4[i]
+
+ ndof = max(len(flags_used) - 1, 1)
+ return chi2, ndof, flags_used, gpu_c, g4_c
+
+def identify_glancing(gpu, g4):
+ """
+ Identify glancing-angle photons where the normal sign ambiguity
+ causes momentum negation between GPU and G4.
+
+ At glancing incidence cos(theta) ~ 0, float32 vs float64 can produce
+ opposite normal signs, reflecting the photon in the opposite direction.
+ These photons have matching flags but very different positions.
+
+ Returns boolean mask of glancing photons.
+ """
+ gpu_mom = gpu[:, 1, :3]
+ g4_mom = g4[:, 1, :3]
+
+ # Normalize momenta (should already be unit vectors, but be safe)
+ gpu_norm = np.linalg.norm(gpu_mom, axis=1, keepdims=True)
+ g4_norm = np.linalg.norm(g4_mom, axis=1, keepdims=True)
+ gpu_norm[gpu_norm == 0] = 1
+ g4_norm[g4_norm == 0] = 1
+
+ gpu_hat = gpu_mom / gpu_norm
+ g4_hat = g4_mom / g4_norm
+
+ # Dot product of momentum directions: -1 = fully negated (normal flip)
+ mom_dot = np.sum(gpu_hat * g4_hat, axis=1)
+
+ # Glancing: momentum vectors are nearly anti-parallel (dot ~ -1)
+ glancing = mom_dot < -0.5
+ return glancing, mom_dot
+
+def main():
+ if len(sys.argv) < 3:
+ print(f"Usage: {sys.argv[0]} ")
+ sys.exit(1)
+
+ gpu = np.load(sys.argv[1])
+ g4 = np.load(sys.argv[2])
+
+ print(f"GPU shape: {gpu.shape}")
+ print(f"G4 shape: {g4.shape}")
+
+ n = min(len(gpu), len(g4))
+ gpu = gpu[:n]
+ g4 = g4[:n]
+
+ gpu_flags = extract_flag(gpu)
+ g4_flags = extract_flag(g4)
+
+ # ---- 1. Per-photon flag comparison ----
+ match = gpu_flags == g4_flags
+ n_match = match.sum()
+ n_diff = n - n_match
+ print(f"\n{'='*60}")
+ print(f"FLAG COMPARISON ({n} photons)")
+ print(f"{'='*60}")
+ print(f" Matching: {n_match} ({100*n_match/n:.1f}%)")
+ print(f" Differ: {n_diff} ({100*n_diff/n:.1f}%)")
+
+ # ---- 2. Position comparison ----
+ gpu_pos = gpu[:, 0, :3]
+ g4_pos = g4[:, 0, :3]
+ pos_diff = np.linalg.norm(gpu_pos - g4_pos, axis=1)
+ zero_g4 = np.all(g4_pos == 0, axis=1)
+
+ valid = ~zero_g4
+ n_valid = valid.sum()
+ print(f"\n{'='*60}")
+ print(f"POSITION COMPARISON ({n_valid} valid, {zero_g4.sum()} unrecorded)")
+ print(f"{'='*60}")
+ if n_valid > 0:
+ vdiff = pos_diff[valid]
+ print(f" Mean dist: {vdiff.mean():.4f} mm")
+ print(f" Max dist: {vdiff.max():.4f} mm")
+ print(f" < 0.01 mm: {(vdiff < 0.01).sum()} ({100*(vdiff < 0.01).sum()/n_valid:.1f}%)")
+ print(f" < 0.1 mm: {(vdiff < 0.1).sum()} ({100*(vdiff < 0.1).sum()/n_valid:.1f}%)")
+ print(f" < 1.0 mm: {(vdiff < 1.0).sum()} ({100*(vdiff < 1.0).sum()/n_valid:.1f}%)")
+
+ # ---- 3. Chi-squared test on flag distributions ----
+ print(f"\n{'='*60}")
+ print(f"CHI-SQUARED TEST (flag distribution)")
+ print(f"{'='*60}")
+
+ chi2_val, ndof, flags_used, gpu_c, g4_c = chi2_flag_distribution(gpu_flags, g4_flags)
+
+ print(f" {'Flag':<20s} {'GPU':>8s} {'G4':>8s} {'Diff':>8s}")
+ print(f" {'-'*20} {'-'*8} {'-'*8} {'-'*8}")
+ for i, f in enumerate(flags_used):
+ diff = int(gpu_c[i] - g4_c[i])
+ sign = "+" if diff > 0 else ""
+ print(f" {flag_name(f):<20s} {int(gpu_c[i]):>8d} {int(g4_c[i]):>8d} {sign}{diff:>7d}")
+
+ deviant_frac = 100 * n_diff / n if n > 0 else 0
+ print(f"\n chi2/ndof = {chi2_val:.2f}/{ndof} = {chi2_val/ndof:.2f}")
+ print(f" deviant fraction: {deviant_frac:.2f}% ({n_diff}/{n})")
+
+ # ---- 4. Glancing-angle analysis ----
+ print(f"\n{'='*60}")
+ print(f"GLANCING-ANGLE ANALYSIS (normal sign ambiguity)")
+ print(f"{'='*60}")
+
+ glancing, mom_dot = identify_glancing(gpu, g4)
+ n_glancing = glancing.sum()
+
+ # Among matching-flag photons, how many are glancing with large pos diff?
+ match_glancing = match & glancing
+ match_large_pos = match & (pos_diff > 1.0)
+ match_glancing_large = match & glancing & (pos_diff > 1.0)
+
+ print(f" Glancing photons (mom dot < -0.5): {n_glancing}")
+ print(f" Matching flag + pos diff > 1mm: {match_large_pos.sum()}")
+ print(f" Of those, glancing: {match_glancing_large.sum()}")
+ if match_large_pos.sum() > 0:
+ frac = 100 * match_glancing_large.sum() / match_large_pos.sum()
+ print(f" Fraction explained by glancing: {frac:.0f}%")
+
+ # Position stats excluding glancing photons
+ non_glancing_match = match & ~glancing & valid
+ if non_glancing_match.sum() > 0:
+ ng_diff = pos_diff[non_glancing_match]
+ print(f"\n Position (matching, non-glancing, {non_glancing_match.sum()} photons):")
+ print(f" Max dist: {ng_diff.max():.6f} mm")
+ print(f" Mean dist: {ng_diff.mean():.6f} mm")
+ print(f" < 0.01 mm: {(ng_diff < 0.01).sum()} ({100*(ng_diff < 0.01).sum()/non_glancing_match.sum():.1f}%)")
+
+ # ---- 5. Divergent photon listing ----
+ if n_diff > 0:
+ div_idx = np.where(~match)[0]
+ print(f"\n{'='*60}")
+ print(f"DIVERGENT PHOTONS (first 10 of {n_diff})")
+ print(f"{'='*60}")
+ for i in div_idx[:10]:
+ gf = flag_name(gpu_flags[i])
+ cf = flag_name(g4_flags[i])
+ gp = gpu_pos[i]
+ cp = g4_pos[i]
+ print(f" [{i:5d}] GPU: {gf:20s} pos=({gp[0]:8.2f},{gp[1]:8.2f},{gp[2]:8.2f})")
+ print(f" G4: {cf:20s} pos=({cp[0]:8.2f},{cp[1]:8.2f},{cp[2]:8.2f})")
+
+if __name__ == "__main__":
+ main()
diff --git a/optiphy/ana/compare_gpu_g4.py b/optiphy/ana/compare_gpu_g4.py
new file mode 100755
index 000000000..87fe210e6
--- /dev/null
+++ b/optiphy/ana/compare_gpu_g4.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python
+"""
+compare_gpu_g4.py : Compare GPU (opticks) vs G4 (standalone) simulation hits
+=============================================================================
+
+Reads GPU hit/photon arrays from an opticks event folder and G4 hits from
+g4_hits.npy, then prints a side-by-side comparison table.
+
+Usage::
+
+ python ana/compare_gpu_g4.py
+
+ # Auto-resolves A000 subfolder:
+ python ana/compare_gpu_g4.py /tmp/$USER/opticks/GEOM/GEOM/GPUPhotonSourceMinimal/ALL0_no_opticks_event_name g4_hits.npy
+"""
+import sys
+import os
+import argparse
+import numpy as np
+
+FLAG_ENUM = {
+ 0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT",
+ 0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB",
+ 0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT",
+ 0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT",
+ 0x1000: "NAN_ABORT", 0x2000: "EFFICIENCY_COLLECT", 0x8000: "MISS",
+}
+
+
+def resolve_event_path(path):
+ if os.path.exists(os.path.join(path, "photon.npy")):
+ return path
+ a000 = os.path.join(path, "A000")
+ if os.path.exists(os.path.join(a000, "photon.npy")):
+ return a000
+ if os.path.isdir(path):
+ for d in sorted(os.listdir(path)):
+ dp = os.path.join(path, d)
+ if os.path.isdir(dp) and os.path.exists(os.path.join(dp, "photon.npy")):
+ return dp
+ return path
+
+
+def hit_stats(hits, label):
+ """Compute statistics dict from a (N, 4, 4) hit array."""
+ n = len(hits)
+ if n == 0:
+ return dict(label=label, n=0)
+ wl = hits[:, 2, 3]
+ t = hits[:, 0, 3]
+ pos = hits[:, 0, :3]
+ r = np.sqrt(np.sum(pos ** 2, axis=1))
+ return dict(
+ label=label, n=n,
+ wl_min=wl.min(), wl_max=wl.max(), wl_mean=wl.mean(), wl_std=wl.std(),
+ t_min=t.min(), t_max=t.max(), t_mean=t.mean(), t_std=t.std(),
+ r_min=r.min(), r_max=r.max(), r_mean=r.mean(),
+ x_mean=pos[:, 0].mean(), y_mean=pos[:, 1].mean(), z_mean=pos[:, 2].mean(),
+ )
+
+
+def print_comparison_table(gpu, g4, n_photons):
+ """Print side-by-side comparison."""
+ w = 14 # column width
+
+ print("=" * 70)
+ print("GPU vs G4 COMPARISON")
+ print("=" * 70)
+
+ print(f"\n {'':30s} {'GPU':>{w}s} {'G4':>{w}s} {'Diff':>{w}s}")
+ print(f" {'-'*30} {'-'*w} {'-'*w} {'-'*w}")
+
+ def row(name, gv, cv, fmt=".1f", diff_fmt=None):
+ if diff_fmt is None:
+ diff_fmt = fmt
+ gs = f"{gv:{fmt}}" if gv is not None else "—"
+ cs = f"{cv:{fmt}}" if cv is not None else "—"
+ if gv is not None and cv is not None:
+ d = cv - gv
+ ds = f"{d:{diff_fmt}}"
+ else:
+ ds = "—"
+ print(f" {name:30s} {gs:>{w}s} {cs:>{w}s} {ds:>{w}s}")
+
+ row("Hits", gpu["n"], g4["n"], "d")
+ if n_photons and n_photons > 0:
+ row("Hit rate (%)", 100.0 * gpu["n"] / n_photons, 100.0 * g4["n"] / n_photons, ".2f")
+
+ if gpu["n"] > 0 and g4["n"] > 0:
+ ratio = g4["n"] / gpu["n"]
+ print(f" {'Ratio G4/GPU':30s} {'':>{w}s} {'':>{w}s} {ratio:>{w}.3f}")
+
+ if gpu["n"] == 0 or g4["n"] == 0:
+ print("\n Cannot compare distributions — one side has zero hits.")
+ return
+
+ print()
+ row("Wavelength min (nm)", gpu["wl_min"], g4["wl_min"])
+ row("Wavelength max (nm)", gpu["wl_max"], g4["wl_max"])
+ row("Wavelength mean (nm)", gpu["wl_mean"], g4["wl_mean"])
+ row("Wavelength std (nm)", gpu["wl_std"], g4["wl_std"])
+
+ print()
+ row("Time min (ns)", gpu["t_min"], g4["t_min"], ".3f")
+ row("Time max (ns)", gpu["t_max"], g4["t_max"], ".3f")
+ row("Time mean (ns)", gpu["t_mean"], g4["t_mean"], ".3f")
+ row("Time std (ns)", gpu["t_std"], g4["t_std"], ".3f")
+
+ print()
+ row("Radius min (mm)", gpu["r_min"], g4["r_min"], ".2f")
+ row("Radius max (mm)", gpu["r_max"], g4["r_max"], ".2f")
+ row("Radius mean (mm)", gpu["r_mean"], g4["r_mean"], ".2f")
+
+ print()
+ row("Mean X (mm)", gpu["x_mean"], g4["x_mean"], ".2f")
+ row("Mean Y (mm)", gpu["y_mean"], g4["y_mean"], ".2f")
+ row("Mean Z (mm)", gpu["z_mean"], g4["z_mean"], ".2f")
+
+ # Statistical significance
+ print()
+ if n_photons and n_photons > 0:
+ p_pool = (gpu["n"] + g4["n"]) / (2 * n_photons)
+ std = np.sqrt(p_pool * (1 - p_pool) / n_photons)
+ if std > 0:
+ z = abs(gpu["n"] / n_photons - g4["n"] / n_photons) / (std * np.sqrt(2))
+ expected_fluct = std * np.sqrt(2) * n_photons
+ print(f" {'Z-score (hit count)':30s} {z:>{w}.1f}")
+ print(f" {'Expected 1σ fluctuation':30s} {expected_fluct:>{w}.0f} hits")
+ if z > 3:
+ print(f" ** Statistically significant difference (>{3}σ) **")
+ else:
+ print(f" Within statistical expectations (<3σ)")
+ print()
+
+
+def print_gpu_outcomes(photon):
+ """Print GPU photon outcome summary."""
+ q3 = photon[:, 3, :].view(np.uint32)
+ flag = q3[:, 0] & 0xFFFF
+
+ print("=" * 70)
+ print("GPU PHOTON OUTCOMES")
+ print("=" * 70)
+
+ n = len(flag)
+ vals, counts = np.unique(flag, return_counts=True)
+ order = np.argsort(-counts)
+
+ print(f"\n {'Flag':<22s} {'Count':>8s} {'%':>7s}")
+ print(f" {'-'*22} {'-'*8} {'-'*7}")
+ for idx in order:
+ f = vals[idx]
+ c = counts[idx]
+ name = FLAG_ENUM.get(f, f"0x{f:04x}")
+ print(f" {name:<22s} {c:8d} {100*c/n:6.1f}%")
+ print()
+
+
+def print_wavelength_histograms(gpu_hits, g4_hits):
+ """Print overlaid wavelength histograms."""
+ if len(gpu_hits) == 0 or len(g4_hits) == 0:
+ return
+
+ gpu_wl = gpu_hits[:, 2, 3]
+ g4_wl = g4_hits[:, 2, 3]
+
+ wl_min = min(gpu_wl.min(), g4_wl.min())
+ wl_max = max(gpu_wl.max(), g4_wl.max())
+ bins = np.arange(max(100, np.floor(wl_min / 25) * 25),
+ min(800, np.ceil(wl_max / 25) * 25 + 25), 25)
+
+ gpu_counts, _ = np.histogram(gpu_wl, bins=bins)
+ g4_counts, _ = np.histogram(g4_wl, bins=bins)
+
+ # Normalize to same total for shape comparison
+ gpu_norm = gpu_counts / len(gpu_hits) * 1000
+ g4_norm = g4_counts / len(g4_hits) * 1000
+
+ print("=" * 70)
+ print("WAVELENGTH DISTRIBUTION (per 1000 hits)")
+ print("=" * 70)
+ print(f"\n {'Bin (nm)':<14s} {'GPU':>8s} {'G4':>8s} {'GPU':^20s} {'G4':^20s}")
+ print(f" {'-'*14} {'-'*8} {'-'*8} {'-'*20} {'-'*20}")
+
+ max_bar = 20
+ scale = max(gpu_norm.max(), g4_norm.max())
+ if scale == 0:
+ scale = 1
+
+ for i in range(len(bins) - 1):
+ if gpu_counts[i] == 0 and g4_counts[i] == 0:
+ continue
+ gpu_bar = "#" * int(gpu_norm[i] / scale * max_bar)
+ g4_bar = "#" * int(g4_norm[i] / scale * max_bar)
+ print(f" {bins[i]:5.0f}-{bins[i+1]:5.0f} {gpu_norm[i]:8.1f} {g4_norm[i]:8.1f}"
+ f" {gpu_bar:<20s} {g4_bar:<20s}")
+ print()
+
+
+def print_time_histograms(gpu_hits, g4_hits):
+ """Print overlaid time histograms."""
+ if len(gpu_hits) == 0 or len(g4_hits) == 0:
+ return
+
+ gpu_t = gpu_hits[:, 0, 3]
+ g4_t = g4_hits[:, 0, 3]
+
+ t_max = max(gpu_t.max(), g4_t.max())
+ bin_size = max(1.0, np.ceil(t_max / 15))
+ bins = np.arange(0, t_max + bin_size, bin_size)
+
+ gpu_counts, _ = np.histogram(gpu_t, bins=bins)
+ g4_counts, _ = np.histogram(g4_t, bins=bins)
+
+ gpu_norm = gpu_counts / len(gpu_hits) * 1000
+ g4_norm = g4_counts / len(g4_hits) * 1000
+
+ print("=" * 70)
+ print("TIME DISTRIBUTION (per 1000 hits)")
+ print("=" * 70)
+ print(f"\n {'Bin (ns)':<14s} {'GPU':>8s} {'G4':>8s} {'GPU':^20s} {'G4':^20s}")
+ print(f" {'-'*14} {'-'*8} {'-'*8} {'-'*20} {'-'*20}")
+
+ max_bar = 20
+ scale = max(gpu_norm.max(), g4_norm.max())
+ if scale == 0:
+ scale = 1
+
+ for i in range(len(bins) - 1):
+ if gpu_counts[i] == 0 and g4_counts[i] == 0:
+ continue
+ gpu_bar = "#" * int(gpu_norm[i] / scale * max_bar)
+ g4_bar = "#" * int(g4_norm[i] / scale * max_bar)
+ print(f" {bins[i]:5.1f}-{bins[i+1]:5.1f} {gpu_norm[i]:8.1f} {g4_norm[i]:8.1f}"
+ f" {gpu_bar:<20s} {g4_bar:<20s}")
+ print()
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Compare GPU (opticks) vs G4 (standalone) simulation hits",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=__doc__,
+ )
+ parser.add_argument("gpu_path", help="Path to GPU opticks event folder")
+ parser.add_argument("g4_hits", help="Path to G4 hits file (g4_hits.npy)")
+ parser.add_argument("--histograms", action="store_true",
+ help="Show wavelength and time distribution histograms")
+
+ args = parser.parse_args()
+
+ gpu_path = resolve_event_path(args.gpu_path)
+ if not os.path.exists(os.path.join(gpu_path, "photon.npy")):
+ print(f"Error: photon.npy not found in {gpu_path}")
+ sys.exit(1)
+ if not os.path.exists(args.g4_hits):
+ print(f"Error: {args.g4_hits} not found")
+ sys.exit(1)
+
+ # Load GPU arrays
+ gpu_hits = np.load(os.path.join(gpu_path, "hit.npy")) if os.path.exists(os.path.join(gpu_path, "hit.npy")) else np.zeros((0, 4, 4), dtype=np.float32)
+ gpu_photon = np.load(os.path.join(gpu_path, "photon.npy"))
+ n_photons = len(gpu_photon)
+
+ # Load G4 hits
+ g4_hits = np.load(args.g4_hits)
+
+ print(f"\nGPU event: {gpu_path}")
+ print(f"G4 hits: {args.g4_hits}")
+ print(f"Total photons: {n_photons}\n")
+
+ # Compute stats
+ gpu_stats = hit_stats(gpu_hits, "GPU")
+ g4_stats = hit_stats(g4_hits, "G4")
+
+ # Print tables
+ print_comparison_table(gpu_stats, g4_stats, n_photons)
+ print_gpu_outcomes(gpu_photon)
+
+ if args.histograms:
+ print_wavelength_histograms(gpu_hits, g4_hits)
+ print_time_histograms(gpu_hits, g4_hits)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/optiphy/ana/plot_photon_paths.py b/optiphy/ana/plot_photon_paths.py
new file mode 100644
index 000000000..bf6a09245
--- /dev/null
+++ b/optiphy/ana/plot_photon_paths.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""Plot GPU photon paths colored by wavelength from record.npy.
+
+Usage:
+ python optiphy/ana/plot_photon_paths.py [photon_indices] [--output path.png]
+
+Examples:
+ # Plot first 10 hit photons
+ python optiphy/ana/plot_photon_paths.py /tmp/$USER/opticks/GEOM/GEOM/GPUPhotonSourceMinimal/ALL0_no_opticks_event_name/A000
+
+ # Plot specific photons by index
+ python optiphy/ana/plot_photon_paths.py /tmp/$USER/opticks/.../A000 2,19,6
+
+ # Custom output path
+ python optiphy/ana/plot_photon_paths.py /tmp/$USER/opticks/.../A000 2,19,6 --output my_plot.png
+"""
+import argparse
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import matplotlib.colors as mcolors
+from matplotlib.cm import ScalarMappable
+
+
+def wl_to_rgb(wl):
+ """Convert wavelength (nm) to RGB tuple. Covers 300-780nm."""
+ r = g = b = 0.0
+ if 300 <= wl < 380:
+ t = (wl - 300) / (380 - 300)
+ r = 0.4 * (1 - t) + 0.5 * t
+ g = 0
+ b = 0.4 * (1 - t) + 1.0 * t
+ elif 380 <= wl < 440:
+ r = -(wl - 440) / (440 - 380); g = 0; b = 1
+ elif 440 <= wl < 490:
+ r = 0; g = (wl - 440) / (490 - 440); b = 1
+ elif 490 <= wl < 510:
+ r = 0; g = 1; b = -(wl - 510) / (510 - 490)
+ elif 510 <= wl < 580:
+ r = (wl - 510) / (580 - 510); g = 1; b = 0
+ elif 580 <= wl < 645:
+ r = 1; g = -(wl - 645) / (645 - 580); b = 0
+ elif 645 <= wl <= 780:
+ r = 1; g = 0; b = 0
+ else:
+ r = g = b = 0.3
+ return (max(0, min(1, r)), max(0, min(1, g)), max(0, min(1, b)))
+
+
+def get_steps(record, pidx):
+ """Return number of valid steps for photon pidx."""
+ rec_p = record[pidx]
+ rf = rec_p.reshape(rec_p.shape[0], -1)
+ return int(np.sum(np.any(rf != 0, axis=1)))
+
+
+def plot_photon_paths(event_dir, photon_indices=None, output="photon_paths.png",
+ sphere_radii=None, title=None, lim=None):
+ record = np.load(f"{event_dir}/record.npy")
+ photon = np.load(f"{event_dir}/photon.npy")
+
+ q3 = photon[:, 3, :].copy().view(np.uint32)
+ flags = q3[:, 0] & 0xFFFF
+ hit_idx = np.where(flags == 0x40)[0]
+
+ if photon_indices is None:
+ photon_indices = hit_idx[:10]
+
+ fig = plt.figure(figsize=(12, 10))
+ ax = fig.add_subplot(111, projection='3d')
+
+ wl_min, wl_max = 800, 300
+ for pidx in photon_indices:
+ ns = get_steps(record, pidx)
+ if ns < 2:
+ continue
+ rec_p = record[pidx]
+ x = rec_p[:ns, 0, 0]
+ y = rec_p[:ns, 0, 1]
+ z = rec_p[:ns, 0, 2]
+ wl = rec_p[:ns, 2, 3]
+
+ wl_min = min(wl_min, wl.min())
+ wl_max = max(wl_max, wl.max())
+
+ for s in range(ns - 1):
+ color = wl_to_rgb(float(wl[s]))
+ ax.plot([x[s], x[s + 1]], [y[s], y[s + 1]], [z[s], z[s + 1]],
+ color=color, alpha=0.9, linewidth=2.5)
+
+ ax.scatter(x[0], y[0], z[0], c=[wl_to_rgb(float(wl[0]))], s=60,
+ marker='o', edgecolors='black', linewidths=0.8, zorder=5)
+ ax.scatter(x[-1], y[-1], z[-1], c='red', s=100, marker='*', zorder=5)
+
+ # Draw spheres if requested
+ if sphere_radii:
+ u = np.linspace(0, 2 * np.pi, 60)
+ v = np.linspace(0, np.pi, 30)
+ sphere_colors = ['mediumpurple', 'lightgreen', 'lightyellow', 'lightcoral']
+ sphere_alphas = [0.1, 0.05, 0.05, 0.05]
+ for i, r in enumerate(sphere_radii):
+ xs = r * np.outer(np.cos(u), np.sin(v))
+ ys = r * np.outer(np.sin(u), np.sin(v))
+ zs = r * np.outer(np.ones_like(u), np.cos(v))
+ ci = min(i, len(sphere_colors) - 1)
+ ax.plot_surface(xs, ys, zs, alpha=sphere_alphas[ci], color=sphere_colors[ci])
+
+ # Wavelength colorbar
+ wl_range = np.linspace(wl_min, wl_max, 256)
+ colors = [wl_to_rgb(w) for w in wl_range]
+ cmap = mcolors.ListedColormap(colors)
+ norm = mcolors.Normalize(vmin=wl_min, vmax=wl_max)
+ sm = ScalarMappable(cmap=cmap, norm=norm)
+ sm.set_array([])
+ plt.colorbar(sm, ax=ax, shrink=0.5, pad=0.08, label='Wavelength (nm)')
+
+ ax.set_xlabel('X (mm)')
+ ax.set_ylabel('Y (mm)')
+ ax.set_zlabel('Z (mm)')
+ if title:
+ ax.set_title(title)
+ if lim:
+ ax.set_xlim(-lim, lim)
+ ax.set_ylim(-lim, lim)
+ ax.set_zlim(-lim, lim)
+ ax.view_init(elev=20, azim=135)
+ plt.tight_layout()
+ plt.savefig(output, dpi=180)
+ print(f"Saved {output}")
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument("event_dir", help="Path to opticks event folder containing record.npy")
+ parser.add_argument("indices", nargs='?', default=None,
+ help="Comma-separated photon indices (default: first 10 hits)")
+ parser.add_argument("--output", "-o", default="photon_paths.png", help="Output image path")
+ parser.add_argument("--spheres", default=None,
+ help="Comma-separated sphere radii to draw (e.g. 10,30)")
+ parser.add_argument("--title", "-t", default=None, help="Plot title")
+ parser.add_argument("--lim", type=float, default=None,
+ help="Axis limit in mm (symmetric)")
+ args = parser.parse_args()
+
+ indices = None
+ if args.indices:
+ indices = [int(x) for x in args.indices.split(',')]
+
+ spheres = None
+ if args.spheres:
+ spheres = [float(x) for x in args.spheres.split(',')]
+
+ plot_photon_paths(args.event_dir, indices, args.output,
+ sphere_radii=spheres, title=args.title, lim=args.lim)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/optiphy/ana/run_and_compare.py b/optiphy/ana/run_and_compare.py
new file mode 100755
index 000000000..d44980234
--- /dev/null
+++ b/optiphy/ana/run_and_compare.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+"""Run GPU and G4 simulations and compare hit distributions.
+
+Runs GPURaytrace with a given GDML and config, then plots:
+ 1. Hit count with sqrt(N) error bars
+ 2. WLS-shifted wavelength distribution
+ 3. Full wavelength distribution
+ 4. Arrival time (bulk, truncated)
+ 5. Arrival time (full range, no overflow)
+ 6. 3D hit position scatter for GPU and G4
+
+Usage:
+ python optiphy/ana/run_and_compare.py -g apex.gdml -s 42 [--outdir plots]
+
+ # Skip simulation, use existing .npy files:
+ python optiphy/ana/run_and_compare.py --gpu-hits gpu_hits.npy --g4-hits g4_hits.npy
+"""
+import argparse
+import os
+import subprocess
+import sys
+import math
+
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+
+
+def run_simulation(gdml, config, macro, seed):
+ """Run GPURaytrace and return (gpu_hits_path, g4_hits_path, gpu_nhits, g4_nhits)."""
+ print("NOTE: For step count plots, set \"mode\": \"DebugLite\" in the config JSON file.")
+ print(" For hit-only analysis, \"HitPhoton\" is sufficient.")
+ print()
+ cmd = ["/opt/eic-opticks/bin/GPURaytrace",
+ "-g", gdml, "-m", macro, "-s", str(seed)]
+ if config:
+ cmd += ["-c", config]
+
+ print(f"Running: {' '.join(cmd)}")
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+ output = result.stdout + result.stderr
+
+ gpu_nhits = g4_nhits = 0
+ for line in output.split('\n'):
+ if 'Opticks: NumHits:' in line:
+ gpu_nhits = int(line.strip().split()[-1])
+ if 'Geant4: NumHits:' in line:
+ g4_nhits = int(line.strip().split()[-1])
+
+ print(f" GPU: {gpu_nhits} hits, G4: {g4_nhits} hits")
+ return "gpu_hits.npy", "g4_hits.npy", gpu_nhits, g4_nhits
+
+
+def load_hits(path, expected_cols=None):
+ """Load hit array and reshape to (N, ?, 4)."""
+ a = np.load(path)
+ if a.ndim == 2:
+ ncols = a.shape[1] // 4
+ a = a.reshape(-1, ncols, 4)
+ return a
+
+
+def plot_with_errors(ax, data1, data2, bins, label1, label2, xlabel):
+ """Plot two histograms as points with sqrt(N) error bars."""
+ h1, edges = np.histogram(data1, bins=bins)
+ h2, _ = np.histogram(data2, bins=bins)
+ centers = (edges[:-1] + edges[1:]) / 2
+ width = (edges[1] - edges[0]) * 0.35
+ ax.errorbar(centers - width / 2, h1, yerr=np.sqrt(np.maximum(h1, 1)),
+ fmt='o', color='dodgerblue', markersize=4, capsize=2,
+ linewidth=1, label=label1)
+ ax.errorbar(centers + width / 2, h2, yerr=np.sqrt(np.maximum(h2, 1)),
+ fmt='s', color='orangered', markersize=4, capsize=2,
+ linewidth=1, label=label2)
+ ax.set_xlabel(xlabel)
+ ax.set_ylabel('Counts')
+ ax.legend()
+
+
+def make_plots(gpu, g4, outdir, title_extra="", g4_full=None, g4_raw_shape=None):
+ os.makedirs(outdir, exist_ok=True)
+
+ gpu_wl = gpu[:, 2, 3]
+ g4_wl = g4[:, 2, 3]
+ gpu_t = gpu[:, 0, 3]
+ g4_t = g4[:, 0, 3]
+ gpu_pos = gpu[:, 0, :3]
+ g4_pos = g4[:, 0, :3]
+
+ diff = 100 * (len(gpu) / len(g4) - 1) if len(g4) > 0 else 0
+ z_score = (len(gpu) - len(g4)) / math.sqrt(len(gpu) + len(g4)) if (len(gpu) + len(g4)) > 0 else 0
+ header = f"GPU={len(gpu)} G4={len(g4)} ({diff:+.1f}%, {z_score:+.1f}σ)"
+ if title_extra:
+ header = f"{title_extra}\n{header}"
+
+ # 1. Hit count
+ fig, ax = plt.subplots(figsize=(6, 5))
+ vals = [len(gpu), len(g4)]
+ errs = [math.sqrt(v) for v in vals]
+ ax.errorbar([0], [vals[0]], yerr=[errs[0]], fmt='o', markersize=12,
+ capsize=8, linewidth=2, color='dodgerblue', label='GPU')
+ ax.errorbar([1], [vals[1]], yerr=[errs[1]], fmt='s', markersize=12,
+ capsize=8, linewidth=2, color='orangered', label='G4')
+ ax.set_xticks([0, 1])
+ ax.set_xticklabels(['GPU', 'G4'])
+ ax.set_ylabel('Hits')
+ ax.set_title(f'Hit Count\n{header}')
+ ax.set_xlim(-0.5, 1.5)
+ ax.legend()
+ for i, (v, e) in enumerate(zip(vals, errs)):
+ ax.text(i + 0.15, v, f'{v}±{e:.0f}', va='center', fontsize=10)
+ plt.tight_layout()
+ plt.savefig(f'{outdir}/hits.png', dpi=150)
+ plt.close()
+
+ # 2. WLS-shifted wavelength
+ fig, ax = plt.subplots(figsize=(8, 5))
+ gpu_s = gpu_wl[gpu_wl > 380]
+ g4_s = g4_wl[g4_wl > 380]
+ plot_with_errors(ax, gpu_s, g4_s, np.arange(380, 550, 15),
+ f'GPU ({len(gpu_s)})', f'G4 ({len(g4_s)})',
+ 'Wavelength (nm)')
+ ax.set_title(f'WLS-shifted Wavelength (>380nm)\n{header}')
+ plt.tight_layout()
+ plt.savefig(f'{outdir}/wavelength_shifted.png', dpi=150)
+ plt.close()
+
+ # 3. Full wavelength
+ fig, ax = plt.subplots(figsize=(8, 5))
+ plot_with_errors(ax, gpu_wl, g4_wl, np.arange(330, 550, 15),
+ f'GPU ({len(gpu)})', f'G4 ({len(g4)})',
+ 'Wavelength (nm)')
+ ax.set_title(f'Full Wavelength\n{header}')
+ plt.tight_layout()
+ plt.savefig(f'{outdir}/wavelength_full.png', dpi=150)
+ plt.close()
+
+ # 4. Arrival time (bulk, truncated at 99th percentile)
+ fig, ax = plt.subplots(figsize=(8, 5))
+ t_cut = max(np.percentile(gpu_t, 99), np.percentile(g4_t, 99))
+ t_bins = np.linspace(0, t_cut, 30)
+ gpu_over = (gpu_t > t_cut).sum()
+ g4_over = (g4_t > t_cut).sum()
+ plot_with_errors(ax, gpu_t[gpu_t <= t_cut], g4_t[g4_t <= t_cut], t_bins,
+ f'GPU (overflow={gpu_over})', f'G4 (overflow={g4_over})',
+ 'Time (ns)')
+ ax.set_title(f'Arrival Time (t < {t_cut:.0f}ns)\n{header}')
+ ax.set_yscale('log')
+ ax.set_ylim(bottom=0.5)
+ plt.tight_layout()
+ plt.savefig(f'{outdir}/time_bulk.png', dpi=150)
+ plt.close()
+
+ # 5. Arrival time (full range, no overflow)
+ fig, ax = plt.subplots(figsize=(8, 5))
+ t_max = max(gpu_t.max(), g4_t.max()) * 1.05
+ t_bins_full = np.linspace(0, t_max, 50)
+ plot_with_errors(ax, gpu_t, g4_t, t_bins_full,
+ f'GPU ({len(gpu)})', f'G4 ({len(g4)})',
+ 'Time (ns)')
+ ax.set_title(f'Arrival Time (full range)\n{header}')
+ ax.set_yscale('log')
+ ax.set_ylim(bottom=0.5)
+ plt.tight_layout()
+ plt.savefig(f'{outdir}/time_full.png', dpi=150)
+ plt.close()
+
+ # 6. 3D hit positions
+ fig = plt.figure(figsize=(14, 6))
+
+ ax1 = fig.add_subplot(121, projection='3d')
+ ax1.scatter(gpu_pos[:, 0], gpu_pos[:, 1], gpu_pos[:, 2],
+ c='dodgerblue', s=3, alpha=0.5)
+ ax1.set_xlabel('X (mm)')
+ ax1.set_ylabel('Y (mm)')
+ ax1.set_zlabel('Z (mm)')
+ ax1.set_title(f'GPU hit positions ({len(gpu)})')
+
+ ax2 = fig.add_subplot(122, projection='3d')
+ ax2.scatter(g4_pos[:, 0], g4_pos[:, 1], g4_pos[:, 2],
+ c='orangered', s=3, alpha=0.5)
+ ax2.set_xlabel('X (mm)')
+ ax2.set_ylabel('Y (mm)')
+ ax2.set_zlabel('Z (mm)')
+ ax2.set_title(f'G4 hit positions ({len(g4)})')
+
+ plt.suptitle(f'3D Hit Positions\n{header}')
+ plt.tight_layout()
+ plt.savefig(f'{outdir}/positions_3d.png', dpi=150)
+ plt.close()
+
+ # 7. Step count distribution (GPU from record.npy, G4 from extended hit array)
+ gpu_steps = None
+ g4_steps = None
+
+ # GPU: try to load record.npy and count steps per hit
+ record_path = os.path.join(os.path.dirname(os.environ.get('OPTICKS_EVTDIR', '')),
+ 'record.npy')
+ # Try common paths
+ for rpath in [record_path,
+ '/tmp/MISSING_USER/opticks/GEOM/GEOM/GPURaytrace/ALL0_no_opticks_event_name/A000/record.npy',
+ '/tmp/MISSING_USER/opticks/GEOM/GEOM/GPUPhotonSourceMinimal/ALL0_no_opticks_event_name/A000/record.npy']:
+ if os.path.exists(rpath):
+ try:
+ record = np.load(rpath)
+ photon_all = np.load(rpath.replace('record.npy', 'photon.npy'))
+ q3_all = photon_all[:, 3, :].copy().view(np.uint32)
+ flags_all = q3_all[:, 0] & 0xFFFF
+ hit_mask = (flags_all == 0x40)
+ hit_indices = np.where(hit_mask)[0]
+ rec_flat = record[hit_indices].reshape(len(hit_indices), record.shape[1], -1)
+ gpu_steps = np.sum(np.any(rec_flat != 0, axis=2), axis=1)
+ print(f" GPU step counts loaded from {rpath}")
+ except Exception:
+ pass
+ break
+
+ # G4: step count stored in row 3, col 3 (last float of the 4x4 array)
+ g4_q3w = g4[:, 3, 3]
+ if np.any(g4_q3w > 0):
+ g4_steps = g4_q3w.astype(int)
+ print(f" G4 step counts loaded from hit array row 3 col 3")
+
+ if gpu_steps is not None or g4_steps is not None:
+ fig, ax = plt.subplots(figsize=(8, 5))
+ s_max = 0
+ if gpu_steps is not None:
+ s_max = max(s_max, np.percentile(gpu_steps, 99))
+ if g4_steps is not None:
+ s_max = max(s_max, np.percentile(g4_steps, 99))
+ s_bins = np.linspace(0, s_max * 1.1, 30)
+
+ if gpu_steps is not None and g4_steps is not None:
+ plot_with_errors(ax, gpu_steps, g4_steps, s_bins,
+ f'GPU ({len(gpu_steps)})', f'G4 ({len(g4_steps)})',
+ 'Steps to detection')
+ elif gpu_steps is not None:
+ h, edges = np.histogram(gpu_steps, bins=s_bins)
+ centers = (edges[:-1] + edges[1:]) / 2
+ ax.errorbar(centers, h, yerr=np.sqrt(np.maximum(h, 1)),
+ fmt='o', color='dodgerblue', markersize=4, capsize=2,
+ label=f'GPU ({len(gpu_steps)})')
+ ax.set_xlabel('Steps to detection')
+ ax.set_ylabel('Counts')
+ ax.legend()
+ elif g4_steps is not None:
+ h, edges = np.histogram(g4_steps, bins=s_bins)
+ centers = (edges[:-1] + edges[1:]) / 2
+ ax.errorbar(centers, h, yerr=np.sqrt(np.maximum(h, 1)),
+ fmt='s', color='orangered', markersize=4, capsize=2,
+ label=f'G4 ({len(g4_steps)})')
+ ax.set_xlabel('Steps to detection')
+ ax.set_ylabel('Counts')
+ ax.legend()
+
+ ax.set_title(f'Steps to Detection\n{header}')
+ plt.tight_layout()
+ plt.savefig(f'{outdir}/step_count.png', dpi=150)
+ plt.close()
+
+ # Print summary
+ print(f"\nSummary: {header}")
+ print(f" Wavelength: GPU mean={gpu_wl.mean():.1f}nm G4 mean={g4_wl.mean():.1f}nm")
+ print(f" Time: GPU mean={gpu_t.mean():.2f}ns G4 mean={g4_t.mean():.2f}ns")
+ print(f" WLS shifted: GPU {100*(gpu_wl>380).mean():.1f}% G4 {100*(g4_wl>380).mean():.1f}%")
+ if gpu_steps is not None:
+ print(f" GPU steps: mean={gpu_steps.mean():.0f} median={np.median(gpu_steps):.0f} max={gpu_steps.max()}")
+ if g4_steps is not None:
+ print(f" G4 steps: mean={g4_steps.mean():.0f} median={np.median(g4_steps):.0f} max={g4_steps.max()}")
+ print(f"\nPlots saved to {outdir}/:")
+ for f in ['hits.png', 'wavelength_shifted.png', 'wavelength_full.png',
+ 'time_bulk.png', 'time_full.png', 'positions_3d.png', 'step_count.png']:
+ print(f" {f}")
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument("-g", "--gdml", default="apex.gdml", help="GDML geometry file")
+ parser.add_argument("-c", "--config", default=None, help="Config name (e.g. det_debug)")
+ parser.add_argument("-m", "--macro", default="tests/run_genstep.mac", help="G4 macro file")
+ parser.add_argument("-s", "--seed", type=int, default=42, help="Random seed")
+ parser.add_argument("--outdir", default="plots", help="Output directory for plots")
+ parser.add_argument("--title", default="", help="Extra title text")
+ parser.add_argument("--gpu-hits", default=None, help="Skip sim, use existing GPU hits .npy")
+ parser.add_argument("--g4-hits", default=None, help="Skip sim, use existing G4 hits .npy")
+ args = parser.parse_args()
+
+ if args.gpu_hits and args.g4_hits:
+ print(f"Using existing files: {args.gpu_hits}, {args.g4_hits}")
+ else:
+ run_simulation(args.gdml, args.config, args.macro, args.seed)
+ args.gpu_hits = "gpu_hits.npy"
+ args.g4_hits = "g4_hits.npy"
+
+ gpu = load_hits(args.gpu_hits)
+ g4_raw = load_hits(args.g4_hits)
+ g4_raw_shape = g4_raw.shape
+
+ # Keep full G4 array for step count extraction
+ g4_full = g4_raw.copy() if g4_raw.shape[1] >= 5 else None
+
+ # Normalize to (N, 4, 4) — take first 4 rows if more
+ gpu = gpu[:, :4, :] if gpu.shape[1] > 4 else gpu
+ g4 = g4_raw[:, :4, :] if g4_raw.shape[1] > 4 else g4_raw
+
+ make_plots(gpu, g4, args.outdir, title_extra=args.title,
+ g4_full=g4_full, g4_raw_shape=g4_raw_shape)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/optiphy/ana/run_genstep_comparison.py b/optiphy/ana/run_genstep_comparison.py
new file mode 100644
index 000000000..321ae6085
--- /dev/null
+++ b/optiphy/ana/run_genstep_comparison.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""
+run_genstep_comparison.py
+==========================
+
+Runs GPU (simg4ox) and G4 (G4ValidationGenstep) simulations with the same
+electron primary, then compares the optical photon hit distributions.
+
+Usage:
+ python run_genstep_comparison.py [--gdml apex.gdml] [--energy 1.0] [--nevents 10] [--seed 42]
+"""
+import os
+import sys
+import subprocess
+import argparse
+import numpy as np
+from pathlib import Path
+
+def find_gpu_hits():
+ """Find the most recent GPU hit.npy output."""
+ base = Path(f"/tmp/{os.environ.get('USER','MISSING_USER')}/opticks")
+ candidates = sorted(base.rglob("hit.npy"), key=lambda p: p.stat().st_mtime, reverse=True)
+ return str(candidates[0]) if candidates else None
+
+def run_g4(gdml, energy, nevents, seed, pos, direction):
+ """Run pure G4 simulation with electron primary."""
+ cmd = [
+ "G4ValidationGenstep",
+ "-g", gdml,
+ "-e", str(energy),
+ "-n", str(nevents),
+ "-s", str(seed),
+ "--pos", pos,
+ "--dir", direction,
+ ]
+ print(f"=== Running G4: {' '.join(cmd)}")
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+
+ # Extract hit count from output
+ g4_hits = 0
+ for line in result.stdout.split('\n'):
+ if "Total hits:" in line:
+ g4_hits = int(line.split("Total hits:")[-1].strip())
+
+ print(f"G4: {g4_hits} hits")
+ if result.returncode != 0:
+ print(f"G4 STDERR (last 5 lines):")
+ for line in result.stderr.strip().split('\n')[-5:]:
+ print(f" {line}")
+ return g4_hits
+
+def run_gpu(gdml, config, macro, seed):
+ """Run GPU simulation via simg4ox."""
+ env = os.environ.copy()
+ env["OPTICKS_INTEGRATION_MODE"] = "1" # Minimal mode: G4 tracks electron, GPU propagates optical
+
+ cmd = [
+ "simg4ox",
+ "-g", gdml,
+ "-c", config,
+ "-m", macro,
+ ]
+ print(f"\n=== Running GPU: {' '.join(cmd)}")
+ print(f" OPTICKS_INTEGRATION_MODE=1 (Minimal)")
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
+
+ if result.returncode != 0:
+ print(f"GPU STDERR (last 10 lines):")
+ for line in result.stderr.strip().split('\n')[-10:]:
+ print(f" {line}")
+ return 0
+
+ # Find hit output
+ hit_path = find_gpu_hits()
+ if hit_path and os.path.exists(hit_path):
+ hits = np.load(hit_path)
+ print(f"GPU: {len(hits)} hits (from {hit_path})")
+ return len(hits)
+ else:
+ print("GPU: no hit.npy found")
+ return 0
+
+def compare_hits(g4_path, gpu_path):
+ """Compare G4 and GPU hit arrays."""
+ if not os.path.exists(g4_path):
+ print(f"G4 hits not found: {g4_path}")
+ return
+ if not gpu_path or not os.path.exists(gpu_path):
+ print(f"GPU hits not found")
+ return
+
+ g4 = np.load(g4_path)
+ gpu = np.load(gpu_path)
+
+ print(f"\n{'='*60}")
+ print(f"HIT COMPARISON")
+ print(f"{'='*60}")
+ print(f" G4 hits: {len(g4)}")
+ print(f" GPU hits: {len(gpu)}")
+
+ if len(g4) > 0 and len(gpu) > 0:
+ diff = len(gpu) - len(g4)
+ pct = 100 * diff / len(g4) if len(g4) > 0 else 0
+ sign = "+" if diff > 0 else ""
+ print(f" Diff: {sign}{diff} ({sign}{pct:.1f}%)")
+
+ # Position distributions
+ if len(g4) > 0:
+ g4_pos = g4[:, 0, :3]
+ print(f"\n G4 hit positions:")
+ print(f" x: [{g4_pos[:,0].min():.1f}, {g4_pos[:,0].max():.1f}] mm")
+ print(f" y: [{g4_pos[:,1].min():.1f}, {g4_pos[:,1].max():.1f}] mm")
+ print(f" z: [{g4_pos[:,2].min():.1f}, {g4_pos[:,2].max():.1f}] mm")
+
+ if len(gpu) > 0:
+ gpu_pos = gpu[:, 0, :3]
+ print(f"\n GPU hit positions:")
+ print(f" x: [{gpu_pos[:,0].min():.1f}, {gpu_pos[:,0].max():.1f}] mm")
+ print(f" y: [{gpu_pos[:,1].min():.1f}, {gpu_pos[:,1].max():.1f}] mm")
+ print(f" z: [{gpu_pos[:,2].min():.1f}, {gpu_pos[:,2].max():.1f}] mm")
+
+ # Wavelength distributions
+ if len(g4) > 0:
+ g4_wl = g4[:, 2, 3]
+ print(f"\n G4 wavelength: mean={g4_wl.mean():.1f} std={g4_wl.std():.1f} nm")
+ if len(gpu) > 0:
+ gpu_wl = gpu[:, 2, 3]
+ print(f" GPU wavelength: mean={gpu_wl.mean():.1f} std={gpu_wl.std():.1f} nm")
+
+ # Time distributions
+ if len(g4) > 0:
+ g4_t = g4[:, 0, 3]
+ print(f"\n G4 time: mean={g4_t.mean():.2f} max={g4_t.max():.2f} ns")
+ if len(gpu) > 0:
+ gpu_t = gpu[:, 0, 3]
+ print(f" GPU time: mean={gpu_t.mean():.2f} max={gpu_t.max():.2f} ns")
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Compare GPU vs G4 electron genstep simulation")
+ parser.add_argument("--gdml", default="apex.gdml", help="GDML geometry file")
+ parser.add_argument("--energy", type=float, default=1.0, help="Electron energy in MeV")
+ parser.add_argument("--nevents", type=int, default=10, help="Number of events")
+ parser.add_argument("--seed", type=int, default=42, help="Random seed")
+ parser.add_argument("--pos", default="0,0,100", help="Electron position x,y,z mm")
+ parser.add_argument("--dir", default="0,0,1", help="Electron direction x,y,z")
+ args = parser.parse_args()
+
+ # Run G4
+ g4_hits = run_g4(args.gdml, args.energy, args.nevents, args.seed, args.pos, args.dir)
+
+ # Compare
+ g4_path = "g4_genstep_hits.npy"
+ gpu_path = find_gpu_hits()
+
+ if os.path.exists(g4_path):
+ g4 = np.load(g4_path)
+ print(f"\n{'='*60}")
+ print(f"G4 RESULTS ({args.nevents} events, {args.energy} MeV electron)")
+ print(f"{'='*60}")
+ print(f" Total hits: {len(g4)}")
+ print(f" Hits/event: {len(g4)/args.nevents:.1f}")
+ if len(g4) > 0:
+ g4_wl = g4[:, 2, 3]
+ g4_pos = g4[:, 0, :3]
+ print(f" Wavelength: mean={g4_wl.mean():.1f} nm")
+ print(f" Hit y range: [{g4_pos[:,1].min():.1f}, {g4_pos[:,1].max():.1f}] mm")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/optiphy/ana/wls_diagnostic.py b/optiphy/ana/wls_diagnostic.py
new file mode 100644
index 000000000..6975983cb
--- /dev/null
+++ b/optiphy/ana/wls_diagnostic.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""
+wls_diagnostic.py : Detailed WLS wavelength distribution comparison GPU vs G4
+==============================================================================
+
+Compares wavelength and time distributions from GPU (opticks) and G4 hits,
+performs KS test, and checks per-pass WLS conversion probability.
+
+Usage::
+
+ python ana/wls_diagnostic.py [--input-wavelength 350]
+"""
+import sys
+import os
+import argparse
+import numpy as np
+
+
+def resolve_event_path(path):
+ if os.path.exists(os.path.join(path, "photon.npy")):
+ return path
+ a000 = os.path.join(path, "A000")
+ if os.path.exists(os.path.join(a000, "photon.npy")):
+ return a000
+ if os.path.isdir(path):
+ for d in sorted(os.listdir(path)):
+ dp = os.path.join(path, d)
+ if os.path.isdir(dp) and os.path.exists(os.path.join(dp, "photon.npy")):
+ return dp
+ return path
+
+
+FLAG_ENUM = {
+ 0x0004: "TORCH", 0x0008: "BULK_ABSORB", 0x0010: "BULK_REEMIT",
+ 0x0020: "BULK_SCATTER", 0x0040: "SURFACE_DETECT", 0x0080: "SURFACE_ABSORB",
+ 0x0100: "SURFACE_DREFLECT", 0x0200: "SURFACE_SREFLECT",
+ 0x0400: "BOUNDARY_REFLECT", 0x0800: "BOUNDARY_TRANSMIT",
+ 0x1000: "NAN_ABORT", 0x2000: "EFFICIENCY_COLLECT", 0x8000: "MISS",
+}
+
+
+def ks_test_2sample(a, b):
+ """Two-sample Kolmogorov-Smirnov test (no scipy dependency)."""
+ na, nb = len(a), len(b)
+ a_sorted = np.sort(a)
+ b_sorted = np.sort(b)
+ all_vals = np.sort(np.concatenate([a_sorted, b_sorted]))
+
+ cdf_a = np.searchsorted(a_sorted, all_vals, side='right') / na
+ cdf_b = np.searchsorted(b_sorted, all_vals, side='right') / nb
+ d_stat = np.max(np.abs(cdf_a - cdf_b))
+
+ # Approximate p-value (asymptotic)
+ n_eff = np.sqrt(na * nb / (na + nb))
+ lam = (n_eff + 0.12 + 0.11 / n_eff) * d_stat
+ # Kolmogorov distribution approximation
+ if lam < 0.001:
+ p_val = 1.0
+ else:
+ p_val = 2.0 * np.exp(-2.0 * lam * lam)
+ p_val = max(0.0, min(1.0, p_val))
+ return d_stat, p_val
+
+
+def print_header(title):
+ print()
+ print("=" * 74)
+ print(f" {title}")
+ print("=" * 74)
+
+
+def print_hit_summary(gpu_hits, g4_hits, n_photons, input_wl):
+ print_header("HIT COUNT SUMMARY")
+ ng, nc = len(gpu_hits), len(g4_hits)
+ print(f" Input photons: {n_photons:>10d} (wavelength = {input_wl:.0f} nm)")
+ print(f" GPU hits: {ng:>10d} ({100*ng/n_photons:.2f}%)")
+ print(f" G4 hits: {nc:>10d} ({100*nc/n_photons:.2f}%)")
+ if ng > 0 and nc > 0:
+ ratio = nc / ng
+ # Significance
+ p_pool = (ng + nc) / (2 * n_photons)
+ se = np.sqrt(2 * p_pool * (1 - p_pool) / n_photons)
+ z = abs(ng/n_photons - nc/n_photons) / se if se > 0 else 0
+ print(f" Ratio G4/GPU: {ratio:>10.4f}")
+ print(f" Z-score: {z:>10.1f} {'** SIGNIFICANT **' if z > 3 else '(within noise)'}")
+ print()
+
+
+def print_wavelength_comparison(gpu_wl, g4_wl):
+ print_header("WAVELENGTH DISTRIBUTION COMPARISON")
+
+ print(f"\n {'Statistic':<25s} {'GPU':>12s} {'G4':>12s} {'Diff':>12s}")
+ print(f" {'-'*25} {'-'*12} {'-'*12} {'-'*12}")
+
+ for name, fn in [("Mean (nm)", np.mean), ("Std (nm)", np.std),
+ ("Median (nm)", np.median), ("Min (nm)", np.min),
+ ("Max (nm)", np.max)]:
+ gv, cv = fn(gpu_wl), fn(g4_wl)
+ print(f" {name:<25s} {gv:12.2f} {cv:12.2f} {cv-gv:12.2f}")
+
+ # Percentiles
+ print()
+ for pct in [5, 25, 75, 95]:
+ gv = np.percentile(gpu_wl, pct)
+ cv = np.percentile(g4_wl, pct)
+ print(f" {'P%d (nm)' % pct:<25s} {gv:12.2f} {cv:12.2f} {cv-gv:12.2f}")
+
+ # KS test
+ d_stat, p_val = ks_test_2sample(gpu_wl, g4_wl)
+ print(f"\n KS statistic: {d_stat:.6f}")
+ print(f" KS p-value: {p_val:.2e}")
+ if p_val < 0.01:
+ print(" ** Wavelength distributions are SIGNIFICANTLY DIFFERENT **")
+ else:
+ print(" Wavelength distributions are statistically compatible")
+ print()
+
+
+def print_fine_histogram(gpu_wl, g4_wl, bin_width=10):
+ print_header(f"WAVELENGTH HISTOGRAM (bin={bin_width}nm)")
+
+ lo = min(gpu_wl.min(), g4_wl.min())
+ hi = max(gpu_wl.max(), g4_wl.max())
+ bins = np.arange(np.floor(lo / bin_width) * bin_width,
+ np.ceil(hi / bin_width) * bin_width + bin_width, bin_width)
+
+ gc, _ = np.histogram(gpu_wl, bins=bins)
+ cc, _ = np.histogram(g4_wl, bins=bins)
+
+ # Normalize to density (per nm per photon)
+ gpu_dens = gc / (len(gpu_wl) * bin_width)
+ g4_dens = cc / (len(g4_wl) * bin_width)
+
+ max_dens = max(gpu_dens.max(), g4_dens.max())
+ bar_w = 25
+
+ print(f"\n {'Bin (nm)':<14s} {'GPU':>8s} {'G4':>8s} {'GPU/G4':>7s} GPU|G4")
+ print(f" {'-'*14} {'-'*8} {'-'*8} {'-'*7} {'-'*51}")
+
+ for i in range(len(bins) - 1):
+ if gc[i] == 0 and cc[i] == 0:
+ continue
+ ratio_str = f"{gc[i]/cc[i]:.2f}" if cc[i] > 0 else " inf"
+ gb = "#" * int(gpu_dens[i] / max_dens * bar_w) if max_dens > 0 else ""
+ cb = "=" * int(g4_dens[i] / max_dens * bar_w) if max_dens > 0 else ""
+ print(f" {bins[i]:5.0f}-{bins[i+1]:5.0f} {gc[i]:8d} {cc[i]:8d} {ratio_str:>7s} {gb:<25s}|{cb:<25s}")
+ print()
+
+
+def print_time_comparison(gpu_t, g4_t):
+ print_header("TIME DISTRIBUTION COMPARISON")
+
+ print(f"\n {'Statistic':<25s} {'GPU':>12s} {'G4':>12s} {'Diff':>12s}")
+ print(f" {'-'*25} {'-'*12} {'-'*12} {'-'*12}")
+
+ for name, fn in [("Mean (ns)", np.mean), ("Std (ns)", np.std),
+ ("Median (ns)", np.median), ("Min (ns)", np.min),
+ ("Max (ns)", np.max)]:
+ gv, cv = fn(gpu_t), fn(g4_t)
+ print(f" {name:<25s} {gv:12.4f} {cv:12.4f} {cv-gv:12.4f}")
+
+ d_stat, p_val = ks_test_2sample(gpu_t, g4_t)
+ print(f"\n KS statistic: {d_stat:.6f}")
+ print(f" KS p-value: {p_val:.2e}")
+ if p_val < 0.01:
+ print(" ** Time distributions are SIGNIFICANTLY DIFFERENT **")
+ else:
+ print(" Time distributions are statistically compatible")
+ print()
+
+
+def print_gpu_outcomes(photon):
+ print_header("GPU PHOTON OUTCOMES (all photons)")
+ q3 = photon[:, 3, :].view(np.uint32)
+ flag = q3[:, 0] & 0xFFFF
+ n = len(flag)
+ vals, counts = np.unique(flag, return_counts=True)
+ order = np.argsort(-counts)
+
+ print(f"\n {'Flag':<22s} {'Count':>8s} {'%':>7s}")
+ print(f" {'-'*22} {'-'*8} {'-'*7}")
+ for idx in order:
+ f = vals[idx]
+ c = counts[idx]
+ name = FLAG_ENUM.get(f, f"0x{f:04x}")
+ print(f" {name:<22s} {c:8d} {100*c/n:6.2f}%")
+ print()
+
+
+def print_position_comparison(gpu_hits, g4_hits):
+ print_header("SPATIAL DISTRIBUTION")
+
+ gpu_pos = gpu_hits[:, 0, :3]
+ g4_pos = g4_hits[:, 0, :3]
+ gpu_r = np.sqrt(np.sum(gpu_pos**2, axis=1))
+ g4_r = np.sqrt(np.sum(g4_pos**2, axis=1))
+
+ print(f"\n {'Statistic':<25s} {'GPU':>12s} {'G4':>12s} {'Diff':>12s}")
+ print(f" {'-'*25} {'-'*12} {'-'*12} {'-'*12}")
+
+ for name, gv, cv in [
+ ("Mean radius (mm)", gpu_r.mean(), g4_r.mean()),
+ ("Mean X (mm)", gpu_pos[:, 0].mean(), g4_pos[:, 0].mean()),
+ ("Mean Y (mm)", gpu_pos[:, 1].mean(), g4_pos[:, 1].mean()),
+ ("Mean Z (mm)", gpu_pos[:, 2].mean(), g4_pos[:, 2].mean()),
+ ]:
+ print(f" {name:<25s} {gv:12.3f} {cv:12.3f} {cv-gv:12.3f}")
+ print()
+
+
+def print_energy_conservation(gpu_wl, g4_wl, input_wl):
+ print_header("ENERGY CONSERVATION CHECK")
+ gpu_viol = np.sum(gpu_wl < input_wl)
+ g4_viol = np.sum(g4_wl < input_wl)
+ print(f" Input wavelength: {input_wl:.0f} nm")
+ print(f" GPU hits with wl < input: {gpu_viol} / {len(gpu_wl)}")
+ print(f" G4 hits with wl < input: {g4_viol} / {len(g4_wl)}")
+ if gpu_viol == 0 and g4_viol == 0:
+ print(" ALL PASS: energy conservation satisfied")
+ else:
+ if gpu_viol > 0:
+ bad = gpu_wl[gpu_wl < input_wl]
+ print(f" GPU violations: min={bad.min():.1f}nm, mean={bad.mean():.1f}nm")
+ if g4_viol > 0:
+ bad = g4_wl[g4_wl < input_wl]
+ print(f" G4 violations: min={bad.min():.1f}nm, mean={bad.mean():.1f}nm")
+ print()
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser.add_argument("gpu_path", help="GPU opticks event folder")
+ parser.add_argument("g4_hits", help="G4 hits file (g4_hits.npy)")
+ parser.add_argument("--input-wavelength", type=float, default=350.0,
+ help="Input photon wavelength in nm (default: 350)")
+ parser.add_argument("--bin-width", type=float, default=5.0,
+ help="Histogram bin width in nm (default: 5)")
+ args = parser.parse_args()
+
+ gpu_path = resolve_event_path(args.gpu_path)
+ hit_path = os.path.join(gpu_path, "hit.npy")
+ photon_path = os.path.join(gpu_path, "photon.npy")
+
+ if not os.path.exists(photon_path):
+ print(f"Error: photon.npy not found in {gpu_path}")
+ sys.exit(1)
+ if not os.path.exists(args.g4_hits):
+ print(f"Error: {args.g4_hits} not found")
+ sys.exit(1)
+
+ gpu_photon = np.load(photon_path)
+ gpu_hits = np.load(hit_path) if os.path.exists(hit_path) else np.zeros((0, 4, 4), dtype=np.float32)
+ g4_hits = np.load(args.g4_hits)
+ n_photons = len(gpu_photon)
+
+ print(f"\n GPU event path: {gpu_path}")
+ print(f" G4 hits file: {args.g4_hits}")
+
+ # Hit summary
+ print_hit_summary(gpu_hits, g4_hits, n_photons, args.input_wavelength)
+
+ if len(gpu_hits) == 0 or len(g4_hits) == 0:
+ print(" Cannot compare — one side has zero hits.")
+ return
+
+ gpu_wl = gpu_hits[:, 2, 3]
+ g4_wl = g4_hits[:, 2, 3]
+ gpu_t = gpu_hits[:, 0, 3]
+ g4_t = g4_hits[:, 0, 3]
+
+ # GPU outcomes
+ print_gpu_outcomes(gpu_photon)
+
+ # Energy conservation
+ print_energy_conservation(gpu_wl, g4_wl, args.input_wavelength)
+
+ # Wavelength comparison
+ print_wavelength_comparison(gpu_wl, g4_wl)
+ print_fine_histogram(gpu_wl, g4_wl, bin_width=args.bin_width)
+
+ # Time comparison
+ print_time_comparison(gpu_t, g4_t)
+
+ # Spatial
+ print_position_comparison(gpu_hits, g4_hits)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/qudarap/CMakeLists.txt b/qudarap/CMakeLists.txt
index 1529e1999..8a7228499 100644
--- a/qudarap/CMakeLists.txt
+++ b/qudarap/CMakeLists.txt
@@ -51,6 +51,8 @@ set(SOURCES
QScint.cc
QScint.cu
+ QWls.cc
+
QCerenkovIntegral.cc
QCerenkov.cc
QCerenkov.cu
@@ -120,6 +122,9 @@ SET(HEADERS
QScint.hh
qscint.h
+ QWls.hh
+ qwls.h
+
QCerenkovIntegral.hh
QCerenkov.hh
qcerenkov.h
diff --git a/qudarap/QSim.cc b/qudarap/QSim.cc
index bf8f77156..d3ed3879a 100644
--- a/qudarap/QSim.cc
+++ b/qudarap/QSim.cc
@@ -3,73 +3,71 @@
#include "SLOG.hh"
-#include "ssys.h"
-#include "sstamp.h"
-#include "spath.h"
#include "SProf.hh"
+#include "spath.h"
+#include "sstamp.h"
+#include "ssys.h"
#include "SComp.h"
+#include "SEvent.hh"
+#include "SEventConfig.hh"
#include "SEvt.hh"
#include "SSim.hh"
+#include "salloc.h"
#include "scuda.h"
#include "squad.h"
-#include "salloc.h"
-#include "SEvent.hh"
-#include "SEventConfig.hh"
-//#include "SCSGOptiX.h"
+// #include "SCSGOptiX.h"
#include "SSimulator.h"
#include "SGenstep.h"
#include "sslice.h"
#include "NP.hh"
-#include "QUDA_CHECK.h"
#include "QU.hh"
+#include "QUDA_CHECK.h"
+#include "qdebug.h"
#include "qrng.h"
#include "qsim.h"
-#include "qdebug.h"
#include "QBase.hh"
-#include "QEvt.hh"
-#include "QRng.hh"
-#include "QTex.hh"
-#include "QScint.hh"
-#include "QCerenkov.hh"
#include "QBnd.hh"
-#include "QProp.hh"
-#include "QMultiFilm.hh"
+#include "QCerenkov.hh"
+#include "QDebug.hh"
#include "QEvt.hh"
+#include "QMultiFilm.hh"
#include "QOptical.hh"
-#include "QSimLaunch.hh"
-#include "QDebug.hh"
#include "QPMT.hh"
+#include "QProp.hh"
+#include "QRng.hh"
+#include "QScint.hh"
+#include "QSimLaunch.hh"
+#include "QTex.hh"
+#include "QWls.hh"
#include "QSim.hh"
const plog::Severity QSim::LEVEL = SLOG::EnvLevel("QSim", "DEBUG");
-const bool QSim::REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT);
-const int QSim::SAVE_IGS_EVENTID = ssys::getenvint(_QSim__SAVE_IGS_EVENTID,-1) ;
-const char* QSim::SAVE_IGS_PATH = ssys::getenvvar(_QSim__SAVE_IGS_PATH, "$TMP/.opticks/igs.npy");
-const bool QSim::CONCAT = ssys::getenvbool(_QSim__CONCAT);
-const bool QSim::ALLOC = ssys::getenvbool(_QSim__ALLOC);
+const bool QSim::REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT);
+const int QSim::SAVE_IGS_EVENTID = ssys::getenvint(_QSim__SAVE_IGS_EVENTID, -1);
+const char *QSim::SAVE_IGS_PATH = ssys::getenvvar(_QSim__SAVE_IGS_PATH, "$TMP/.opticks/igs.npy");
+const bool QSim::CONCAT = ssys::getenvbool(_QSim__CONCAT);
+const bool QSim::ALLOC = ssys::getenvbool(_QSim__ALLOC);
-
-
-QSim* QSim::INSTANCE = nullptr ;
-QSim* QSim::Get(){ return INSTANCE ; }
-
-QSim* QSim::Create()
+QSim *QSim::INSTANCE = nullptr;
+QSim *QSim::Get()
{
- LOG_IF(fatal, INSTANCE != nullptr) << " a QSim INSTANCE already exists " ;
- assert( INSTANCE == nullptr ) ;
- return new QSim ;
+ return INSTANCE;
}
-
-
+QSim *QSim::Create()
+{
+ LOG_IF(fatal, INSTANCE != nullptr) << " a QSim INSTANCE already exists ";
+ assert(INSTANCE == nullptr);
+ return new QSim;
+}
/**
QSim::UploadComponents
@@ -115,137 +113,135 @@ This structure is used to allow separate testing.
**/
-void QSim::UploadComponents( const SSim* ssim )
+void QSim::UploadComponents(const SSim *ssim)
{
- LOG(LEVEL) << "[ ssim " << ssim ;
- if(getenv("QSim__UploadComponents_SIGINT")) std::raise(SIGINT);
-
- LOG(LEVEL) << "[ new QBase" ;
- QBase* base = new QBase ;
- LOG(LEVEL) << "] new QBase : latency here of about 0.3s from first device access, if latency of >1s need to start nvidia-persistenced " ;
+ LOG(LEVEL) << "[ ssim " << ssim;
+ if (getenv("QSim__UploadComponents_SIGINT"))
+ std::raise(SIGINT);
+
+ LOG(LEVEL) << "[ new QBase";
+ QBase *base = new QBase;
+ LOG(LEVEL) << "] new QBase : latency here of about 0.3s from first device access, if latency of >1s need to start "
+ "nvidia-persistenced ";
LOG(LEVEL) << base->desc();
-
- unsigned skipahead_event_offset = SEventConfig::EventSkipahead() ;
- LOG(LEVEL) << "[ new QRng skipahead_event_offset : " << skipahead_event_offset << " " << SEventConfig::kEventSkipahead ;
- QRng* rng = new QRng(skipahead_event_offset) ; // loads and uploads RNG
- LOG(LEVEL) << "] new QRng " << rng->desc() ;
+ unsigned skipahead_event_offset = SEventConfig::EventSkipahead();
+ LOG(LEVEL) << "[ new QRng skipahead_event_offset : " << skipahead_event_offset << " "
+ << SEventConfig::kEventSkipahead;
+ QRng *rng = new QRng(skipahead_event_offset); // loads and uploads RNG
+ LOG(LEVEL) << "] new QRng " << rng->desc();
LOG(LEVEL) << rng->desc();
- const NP* optical = ssim->get(snam::OPTICAL);
- const NP* bnd = ssim->get(snam::BND);
+ const NP *optical = ssim->get(snam::OPTICAL);
+ const NP *bnd = ssim->get(snam::BND);
- if( optical == nullptr && bnd == nullptr )
+ if (optical == nullptr && bnd == nullptr)
{
- LOG(error) << " optical and bnd null snam::OPTICAL " << snam::OPTICAL << " snam::BND " << snam::BND ;
+ LOG(error) << " optical and bnd null snam::OPTICAL " << snam::OPTICAL << " snam::BND " << snam::BND;
}
else
{
- // note that QOptical and QBnd are tightly coupled, perhaps add constraints to tie them together
- QOptical* qopt = new QOptical(optical);
+ // note that QOptical and QBnd are tightly coupled, perhaps add constraints to tie them together
+ QOptical *qopt = new QOptical(optical);
LOG(LEVEL) << qopt->desc();
- QBnd* qbnd = new QBnd(bnd); // boundary texture with standard domain, used for standard fast property lookup
+ QBnd *qbnd = new QBnd(bnd); // boundary texture with standard domain, used for standard fast property lookup
LOG(LEVEL) << qbnd->desc();
}
- QDebug* debug_ = new QDebug ;
- LOG(LEVEL) << debug_->desc() ;
+ QDebug *debug_ = new QDebug;
+ LOG(LEVEL) << debug_->desc();
- const NP* propcom = ssim->get(snam::PROPCOM);
- if( propcom )
+ const NP *propcom = ssim->get(snam::PROPCOM);
+ if (propcom)
{
- LOG(LEVEL) << "[ QProp " ;
- QProp* prop = new QProp(propcom) ;
+ LOG(LEVEL) << "[ QProp ";
+ QProp *prop = new QProp(propcom);
// property interpolation with per-property domains, eg used for Cerenkov RINDEX sampling
- LOG(LEVEL) << "] QProp " ;
+ LOG(LEVEL) << "] QProp ";
LOG(LEVEL) << prop->desc();
}
else
{
- LOG(LEVEL) << " propcom null, snam::PROPCOM " << snam::PROPCOM ;
+ LOG(LEVEL) << " propcom null, snam::PROPCOM " << snam::PROPCOM;
}
-
- const NP* icdf = ssim->get(snam::ICDF);
- if( icdf == nullptr )
+ const NP *icdf = ssim->get(snam::ICDF);
+ if (icdf == nullptr)
{
- LOG(error) << " icdf null, snam::ICDF " << snam::ICDF ;
+ LOG(error) << " icdf null, snam::ICDF " << snam::ICDF;
}
else
{
- unsigned hd_factor = 20u ; // 0,10,20
- QScint* scint = new QScint( icdf, hd_factor); // custom high-definition inverse CDF for scintillation generation
+ unsigned hd_factor = 20u; // 0,10,20
+ QScint *scint = new QScint(icdf, hd_factor); // custom high-definition inverse CDF for scintillation generation
LOG(LEVEL) << scint->desc();
}
+ const NP *wls_icdf = ssim->get(snam::WLS_ICDF);
+ const NP *wls_mat_map = ssim->get(snam::WLS_MAT_MAP);
+ if (wls_icdf == nullptr || wls_mat_map == nullptr)
+ {
+ LOG(LEVEL) << " wls_icdf or wls_mat_map null — no WLS materials in geometry ";
+ }
+ else
+ {
+ const NP *wls_tc = ssim->get(snam::WLS_TIME_CONSTANTS);
+ if (wls_tc)
+ {
+ unsigned hd_factor = 20u;
+ QWls *qwls_ = new QWls(wls_icdf, wls_mat_map, wls_tc, hd_factor);
+ LOG(LEVEL) << qwls_->desc();
+ }
+ else
+ {
+ LOG(error) << " wls_icdf and wls_mat_map present but wls_time_constants missing ";
+ }
+ }
// TODO: make this more like the others : acting on the available inputs rather than the mode
- bool is_simtrace = SEventConfig::IsRGModeSimtrace() ;
- if(is_simtrace == false )
+ bool is_simtrace = SEventConfig::IsRGModeSimtrace();
+ if (is_simtrace == false)
{
- QCerenkov* cerenkov = new QCerenkov ;
+ QCerenkov *cerenkov = new QCerenkov;
LOG(LEVEL) << cerenkov->desc();
}
else
{
- LOG(LEVEL) << " skip QCerenkov for simtrace running " ;
+ LOG(LEVEL) << " skip QCerenkov for simtrace running ";
}
+ const NPFold *spmt_f = ssim->get_spmt_f();
+ QPMT *qpmt = spmt_f ? new QPMT(spmt_f) : nullptr;
+ bool has_PMT = spmt_f != nullptr && qpmt != nullptr;
+ bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false;
+ LOG_IF(fatal, MISSING_PMT) << " MISSING_PMT " << " has_PMT " << (has_PMT ? "YES" : "NO ") << " REQUIRE_PMT "
+ << (REQUIRE_PMT ? "YES" : "NO ") << " MISSING_PMT " << (MISSING_PMT ? "YES" : "NO ")
+ << " spmt_f " << (spmt_f ? "YES" : "NO ") << " qpmt " << (qpmt ? "YES" : "NO ");
- const NPFold* spmt_f = ssim->get_spmt_f() ;
- QPMT* qpmt = spmt_f ? new QPMT(spmt_f) : nullptr ;
+ assert(MISSING_PMT == false);
+ if (MISSING_PMT)
+ std::raise(SIGINT);
- bool has_PMT = spmt_f != nullptr && qpmt != nullptr ;
- bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false ;
+ LOG(LEVEL) << QPMT::Desc() << std::endl
+ << " spmt_f " << (spmt_f ? "YES" : "NO ") << " qpmt " << (qpmt ? "YES" : "NO ");
- LOG_IF(fatal, MISSING_PMT )
- << " MISSING_PMT "
- << " has_PMT " << ( has_PMT ? "YES" : "NO " )
- << " REQUIRE_PMT " << ( REQUIRE_PMT ? "YES" : "NO " )
- << " MISSING_PMT " << ( MISSING_PMT ? "YES" : "NO " )
- << " spmt_f " << ( spmt_f ? "YES" : "NO " )
- << " qpmt " << ( qpmt ? "YES" : "NO " )
- ;
-
- assert(MISSING_PMT == false) ;
- if(MISSING_PMT) std::raise(SIGINT);
-
-
-
- LOG(LEVEL)
- << QPMT::Desc()
- << std::endl
- << " spmt_f " << ( spmt_f ? "YES" : "NO " )
- << " qpmt " << ( qpmt ? "YES" : "NO " )
- ;
-
-
-
- const NP* multifilm = ssim->get_extra(snam::MULTIFILM);
- if(multifilm == nullptr)
+ const NP *multifilm = ssim->get_extra(snam::MULTIFILM);
+ if (multifilm == nullptr)
{
- LOG(LEVEL) << " multifilm null, snam::MULTIFILM " << snam::MULTIFILM ;
+ LOG(LEVEL) << " multifilm null, snam::MULTIFILM " << snam::MULTIFILM;
}
else
{
- QMultiFilm* mul = new QMultiFilm( multifilm );
+ QMultiFilm *mul = new QMultiFilm(multifilm);
LOG(LEVEL) << mul->desc();
}
- LOG(LEVEL) << "] ssim " << ssim ;
-
-
-
+ LOG(LEVEL) << "] ssim " << ssim;
}
-
-
-
-
-
/**
QSim:::QSim
-------------
@@ -261,29 +257,15 @@ singleton components.
**/
QSim::QSim()
- :
- base(QBase::Get()),
- qev(new QEvt),
- sev(qev->sev),
- rng(QRng::Get()),
- scint(QScint::Get()),
- cerenkov(QCerenkov::Get()),
- bnd(QBnd::Get()),
- debug_(QDebug::Get()),
- prop(QProp::Get()),
- pmt(QPMT::Get()),
- multifilm(QMultiFilm::Get()),
- sim(nullptr),
- d_sim(nullptr),
- dbg(debug_ ? debug_->dbg : nullptr),
- d_dbg(debug_ ? debug_->d_dbg : nullptr),
- cx(nullptr)
+ : base(QBase::Get()), qev(new QEvt), sev(qev->sev), rng(QRng::Get()), scint(QScint::Get()), qwls(QWls::Get()),
+ cerenkov(QCerenkov::Get()), bnd(QBnd::Get()), debug_(QDebug::Get()), prop(QProp::Get()),
+ pmt(QPMT::Get()), multifilm(QMultiFilm::Get()), sim(nullptr), d_sim(nullptr),
+ dbg(debug_ ? debug_->dbg : nullptr), d_dbg(debug_ ? debug_->d_dbg : nullptr), cx(nullptr)
{
- LOG(LEVEL) << desc() ;
+ LOG(LEVEL) << desc();
init();
}
-
/**
QSim::init
------------
@@ -303,51 +285,43 @@ place (qsim.h) to add GPU side functionality.
**/
-
void QSim::init()
{
- sim = new qsim ;
- sim->base = base ? base->d_base : nullptr ;
- sim->evt = qev ? qev->getDevicePtr() : nullptr ;
- //sim->rng_state = rng ? rng->qr->uploaded_states : nullptr ;
- sim->rng = rng ? rng->d_qr : nullptr ;
+ sim = new qsim;
+ sim->base = base ? base->d_base : nullptr;
+ sim->evt = qev ? qev->getDevicePtr() : nullptr;
+ // sim->rng_state = rng ? rng->qr->uploaded_states : nullptr ;
+ sim->rng = rng ? rng->d_qr : nullptr;
+
+ sim->bnd = bnd ? bnd->d_qb : nullptr;
+ sim->multifilm = multifilm ? multifilm->d_multifilm : nullptr;
+ sim->cerenkov = cerenkov ? cerenkov->d_cerenkov : nullptr;
+ sim->scint = scint ? scint->d_scint : nullptr;
+ sim->wls = qwls ? qwls->d_wls : nullptr;
+ sim->pmt = pmt ? pmt->d_pmt : nullptr;
+
+ bool has_PMT = pmt != nullptr && sim->pmt != nullptr;
+ bool REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT);
+ bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false;
- sim->bnd = bnd ? bnd->d_qb : nullptr ;
- sim->multifilm = multifilm ? multifilm->d_multifilm : nullptr ;
- sim->cerenkov = cerenkov ? cerenkov->d_cerenkov : nullptr ;
- sim->scint = scint ? scint->d_scint : nullptr ;
- sim->pmt = pmt ? pmt->d_pmt : nullptr ;
+ LOG(LEVEL) << " MISSING_PMT " << (MISSING_PMT ? "YES" : "NO ") << " has_PMT " << (has_PMT ? "YES" : "NO ")
+ << " QSim::pmt " << (pmt ? "YES" : "NO ") << " QSim::pmt->d_pmt " << (sim->pmt ? "YES" : "NO ") << " ["
+ << _QSim__REQUIRE_PMT << "] " << (REQUIRE_PMT ? "YES" : "NO ");
+ LOG_IF(fatal, MISSING_PMT) << " MISSING_PMT ABORT " << " MISSING_PMT " << (MISSING_PMT ? "YES" : "NO ")
+ << " has_PMT " << (has_PMT ? "YES" : "NO ") << " QSim::pmt " << (pmt ? "YES" : "NO ")
+ << " QSim::pmt->d_pmt " << (sim->pmt ? "YES" : "NO ") << " [" << _QSim__REQUIRE_PMT
+ << "] " << (REQUIRE_PMT ? "YES" : "NO ");
- bool has_PMT = pmt != nullptr && sim->pmt != nullptr ;
- bool REQUIRE_PMT = ssys::getenvbool(_QSim__REQUIRE_PMT);
- bool MISSING_PMT = REQUIRE_PMT == true && has_PMT == false ;
-
- LOG(LEVEL)
- << " MISSING_PMT " << ( MISSING_PMT ? "YES" : "NO " )
- << " has_PMT " << ( has_PMT ? "YES" : "NO " )
- << " QSim::pmt " << ( pmt ? "YES" : "NO " )
- << " QSim::pmt->d_pmt " << ( sim->pmt ? "YES" : "NO " )
- << " [" << _QSim__REQUIRE_PMT << "] " << ( REQUIRE_PMT ? "YES" : "NO " )
- ;
-
- LOG_IF(fatal, MISSING_PMT )
- << " MISSING_PMT ABORT "
- << " MISSING_PMT " << ( MISSING_PMT ? "YES" : "NO " )
- << " has_PMT " << ( has_PMT ? "YES" : "NO " )
- << " QSim::pmt " << ( pmt ? "YES" : "NO " )
- << " QSim::pmt->d_pmt " << ( sim->pmt ? "YES" : "NO " )
- << " [" << _QSim__REQUIRE_PMT << "] " << ( REQUIRE_PMT ? "YES" : "NO " )
- ;
-
- assert(MISSING_PMT == false) ;
- if(MISSING_PMT) std::raise(SIGINT);
-
- d_sim = QU::UploadArray(sim, 1, "QSim::init.sim" );
-
- INSTANCE = this ;
- LOG(LEVEL) << desc() ;
- LOG(LEVEL) << descComponents() ;
+ assert(MISSING_PMT == false);
+ if (MISSING_PMT)
+ std::raise(SIGINT);
+
+ d_sim = QU::UploadArray(sim, 1, "QSim::init.sim");
+
+ INSTANCE = this;
+ LOG(LEVEL) << desc();
+ LOG(LEVEL) << descComponents();
}
/**
@@ -357,12 +331,11 @@ QSim::setLauncher
Formerly used SCSGOptiX
**/
-void QSim::setLauncher(SSimulator* cx_ )
+void QSim::setLauncher(SSimulator *cx_)
{
- cx = cx_ ;
+ cx = cx_;
}
-
/**
QSim::post_launch
--------------------
@@ -378,7 +351,6 @@ void QSim::post_launch()
}
**/
-
/**
QSim::simulate
---------------
@@ -427,190 +399,159 @@ bool QSim::KEEP_SUBFOLD = ssys::getenvbool(QSim__simulate_KEEP_SUBFOLD);
double QSim::simulate(int eventID, bool reset_)
{
- SProf::SetTag(eventID, "A%0.3d_" ) ;
+ SProf::SetTag(eventID, "A%0.3d_");
- assert( SEventConfig::IsRGModeSimulate() );
+ assert(SEventConfig::IsRGModeSimulate());
- //cudaStream_t stream ; cudaStreamCreate(&stream);
- cudaStream_t stream = 0 ;
+ // cudaStream_t stream ; cudaStreamCreate(&stream);
+ cudaStream_t stream = 0;
+ int64_t tot_ph = 0;
- int64_t tot_ph = 0 ;
+ double tot_dt = 0.;
- double tot_dt = 0. ;
-
- int64_t tot_idt = 0 ;
- int64_t tot_gdt = 0 ;
+ int64_t tot_idt = 0;
+ int64_t tot_gdt = 0;
int64_t t_HEAD = SProf::Add("QSim__simulate_HEAD");
- LOG_IF(info, SEvt::LIFECYCLE) << "[ eventID " << eventID ;
- if( qev == nullptr ) return -1. ;
-
+ LOG_IF(info, SEvt::LIFECYCLE) << "[ eventID " << eventID;
+ if (qev == nullptr)
+ return -1.;
- sev->beginOfEvent(eventID); // set SEvt index and tees up frame gensteps for simtrace and input photon simulate running
+ sev->beginOfEvent(
+ eventID); // set SEvt index and tees up frame gensteps for simtrace and input photon simulate running
- NP* igs = sev->makeGenstepArrayFromVector();
+ NP *igs = sev->makeGenstepArrayFromVector();
MaybeSaveIGS(eventID, igs);
- std::vector igs_slice ;
- int64_t tot_ph_0 = SGenstep::GetGenstepSlices( igs_slice, igs, SEventConfig::MaxSlot() );
+ std::vector igs_slice;
+ int64_t tot_ph_0 = SGenstep::GetGenstepSlices(igs_slice, igs, SEventConfig::MaxSlot());
- //bool xxl = tot_ph_0 > SGenstep::MAX_SLOT_PER_SLICE ;
- bool xxl = tot_ph_0 > 100*M ;
+ // bool xxl = tot_ph_0 > SGenstep::MAX_SLOT_PER_SLICE ;
+ bool xxl = tot_ph_0 > 100 * M;
int num_slice = igs_slice.size();
- LOG(xxl ? info : LEVEL)
- << " eventID " << std::setw(6) << eventID
- << " igs " << ( igs ? igs->sstr() : "-" )
- << " tot_ph_0 " << tot_ph_0
- << " tot_ph_0/M " << tot_ph_0/M
- << " xxl " << ( xxl ? "YES" : "NO " )
- << " MaxSlot " << SEventConfig::MaxSlot()
- << " MaxSlot/M " << SEventConfig::MaxSlot()/M
- << " sslice::Desc(igs_slice)\n"
- << sslice::Desc(igs_slice)
- << " num_slice " << num_slice
- ;
-
+ LOG(xxl ? info : LEVEL) << " eventID " << std::setw(6) << eventID << " igs " << (igs ? igs->sstr() : "-")
+ << " tot_ph_0 " << tot_ph_0 << " tot_ph_0/M " << tot_ph_0 / M << " xxl "
+ << (xxl ? "YES" : "NO ") << " MaxSlot " << SEventConfig::MaxSlot() << " MaxSlot/M "
+ << SEventConfig::MaxSlot() / M << " sslice::Desc(igs_slice)\n"
+ << sslice::Desc(igs_slice) << " num_slice " << num_slice;
int64_t t_LBEG = SProf::Add("QSim__simulate_LBEG");
- for(int i=0 ; i < num_slice ; i++)
+ for (int i = 0; i < num_slice; i++)
{
SProf::Add("QSim__simulate_PRUP");
- const sslice& sl = igs_slice[i] ;
+ const sslice &sl = igs_slice[i];
- LOG(LEVEL) << sl.idx_desc(i) ;
+ LOG(LEVEL) << sl.idx_desc(i);
- int rc = qev->setGenstepUpload_NP(igs, &sl ) ;
- LOG_IF(error, rc != 0) << " QEvt::setGenstep ERROR : have qev but no gensteps collected : will skip cx.simulate " ;
-
- LOG_IF(info, ALLOC)
- << " [" << _QSim__ALLOC << "] "
- << " i " << std::setw(5) << i
- << " SEventConfig::ALLOC " << ( SEventConfig::ALLOC ? "YES" : "NO " )
- << ( SEventConfig::ALLOC ? SEventConfig::ALLOC->desc() : "-" )
- ;
+ int rc = qev->setGenstepUpload_NP(igs, &sl);
+ LOG_IF(error, rc != 0)
+ << " QEvt::setGenstep ERROR : have qev but no gensteps collected : will skip cx.simulate ";
+ LOG_IF(info, ALLOC) << " [" << _QSim__ALLOC << "] " << " i " << std::setw(5) << i << " SEventConfig::ALLOC "
+ << (SEventConfig::ALLOC ? "YES" : "NO ")
+ << (SEventConfig::ALLOC ? SEventConfig::ALLOC->desc() : "-");
SProf::Add("QSim__simulate_PREL");
- sev->t_PreLaunch = sstamp::Now() ;
+ sev->t_PreLaunch = sstamp::Now();
- double dt = rc == 0 && cx != nullptr ? cx->simulate_launch() : -1. ; //SSimulator protocol
+ double dt = rc == 0 && cx != nullptr ? cx->simulate_launch() : -1.; // SSimulator protocol
- sev->t_PostLaunch = sstamp::Now() ;
- sev->t_Launch = dt ;
+ sev->t_PostLaunch = sstamp::Now();
+ sev->t_Launch = dt;
- tot_idt += ( sev->t_PostLaunch - sev->t_PreLaunch ) ;
- tot_dt += dt ;
- tot_ph += sl.ph_count ;
+ tot_idt += (sev->t_PostLaunch - sev->t_PreLaunch);
+ tot_dt += dt;
+ tot_ph += sl.ph_count;
- LOG( xxl ? info : LEVEL )
- << " eventID " << eventID
- << " xxl " << ( xxl ? "YES" : "NO " )
- << " i " << std::setw(4) << i
- << " dt " << std::setw(11) << std::fixed << std::setprecision(6) << dt
- << " slice " << sl.idx_desc(i)
- ;
+ LOG(xxl ? info : LEVEL) << " eventID " << eventID << " xxl " << (xxl ? "YES" : "NO ") << " i " << std::setw(4)
+ << i << " dt " << std::setw(11) << std::fixed << std::setprecision(6) << dt << " slice "
+ << sl.idx_desc(i);
int64_t t_POST = SProf::Add("QSim__simulate_POST");
- sev->gather(); // gather into *fold* just added to *topfold*
+ sev->gather(); // gather into *fold* just added to *topfold*
int64_t t_DOWN = SProf::Add("QSim__simulate_DOWN");
- tot_gdt += ( t_DOWN - t_POST ) ;
+ tot_gdt += (t_DOWN - t_POST);
}
-
- size_t max_slot_M = SEventConfig::MaxSlot()/M;
- std::string anno = SProf::Annotation("slice",num_slice, "max_slot_M", max_slot_M);
+ size_t max_slot_M = SEventConfig::MaxSlot() / M;
+ std::string anno = SProf::Annotation("slice", num_slice, "max_slot_M", max_slot_M);
int64_t t_LEND = SProf::Add("QSim__simulate_LEND", anno.c_str());
- std::stringstream ss ;
- std::ostream* out = CONCAT ? &ss : nullptr ;
+ std::stringstream ss;
+ std::ostream *out = CONCAT ? &ss : nullptr;
int concat_rc = sev->topfold->concat(out);
- LOG_IF(info, CONCAT) << ss.str() ;
- LOG_IF(fatal, concat_rc != 0) << " sev->topfold->concat FAILED " ;
+ LOG_IF(info, CONCAT) << ss.str();
+ LOG_IF(fatal, concat_rc != 0) << " sev->topfold->concat FAILED ";
assert(concat_rc == 0);
bool has_hlm = sev->topfold->has_key(SComp::HITLITEMERGED_);
- bool has_hm = sev->topfold->has_key(SComp::HITMERGED_);
- bool do_final_merge = num_slice > 1 && ( has_hlm || has_hm ) ;
- LOG(LEVEL)
- << " num_slice " << num_slice
- << " has_hm " << ( has_hm ? "YES" : "NO " )
- << " has_hlm " << ( has_hlm ? "YES" : "NO " )
- << " do_final_merge " << ( do_final_merge ? "YES" : "NO " )
- ;
- if(do_final_merge) simulate_final_merge(tot_ph, stream);
+ bool has_hm = sev->topfold->has_key(SComp::HITMERGED_);
+ bool do_final_merge = num_slice > 1 && (has_hlm || has_hm);
+ LOG(LEVEL) << " num_slice " << num_slice << " has_hm " << (has_hm ? "YES" : "NO ") << " has_hlm "
+ << (has_hlm ? "YES" : "NO ") << " do_final_merge " << (do_final_merge ? "YES" : "NO ");
+ if (do_final_merge)
+ simulate_final_merge(tot_ph, stream);
-
- if(!KEEP_SUBFOLD) sev->topfold->clear_subfold();
+ if (!KEEP_SUBFOLD)
+ sev->topfold->clear_subfold();
int64_t t_PCAT = SProf::Add("QSim__simulate_PCAT");
- int tot_ht = sev->getNumHit() ; // NB from fold, so requires hits array gathering to be configured to get non-zero
- std::string counts = sev->getCounts(); // collect counts before reset
-
- LOG_IF(info, SEvt::MINIMAL)
- << " eventID " << eventID
- << " tot_dt " << std::setw(11) << std::fixed << std::setprecision(6) << tot_dt
- << " tot_ph " << std::setw(10) << tot_ph
- << " tot_ph/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ph)/float(M)
- << " tot_ht " << std::setw(10) << tot_ht
- << " tot_ht/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ht)/float(M)
- << " tot_ht/tot_ph " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ht)/float(tot_ph)
- << " reset_ " << ( reset_ ? "YES" : "NO " )
- ;
+ int tot_ht = sev->getNumHit(); // NB from fold, so requires hits array gathering to be configured to get non-zero
+ std::string counts = sev->getCounts(); // collect counts before reset
+ LOG_IF(info, SEvt::MINIMAL) << " eventID " << eventID << " tot_dt " << std::setw(11) << std::fixed
+ << std::setprecision(6) << tot_dt << " tot_ph " << std::setw(10) << tot_ph
+ << " tot_ph/M " << std::setw(10) << std::fixed << std::setprecision(6)
+ << float(tot_ph) / float(M) << " tot_ht " << std::setw(10) << tot_ht << " tot_ht/M "
+ << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_ht) / float(M)
+ << " tot_ht/tot_ph " << std::setw(10) << std::fixed << std::setprecision(6)
+ << float(tot_ht) / float(tot_ph) << " reset_ " << (reset_ ? "YES" : "NO ");
- assert( tot_ph == tot_ph_0 );
+ assert(tot_ph == tot_ph_0);
- int64_t t_BRES = SProf::Add("QSim__simulate_BRES", counts.c_str() );
- if(reset_) reset(eventID) ;
+ int64_t t_BRES = SProf::Add("QSim__simulate_BRES", counts.c_str());
+ if (reset_)
+ reset(eventID);
- int64_t t_TAIL = SProf::Add("QSim__simulate_TAIL");
+ int64_t t_TAIL = SProf::Add("QSim__simulate_TAIL");
SProf::Write(); // per-event write, so have something in case of crash
- LOG_IF(info, SEvt::MINTIME) << "\n"
- << SEvt::SEvt__MINTIME
- << "\n"
- << " (TAIL - HEAD)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_TAIL - t_HEAD )/M
- << " (head to tail of QSim::simulate method) "
- << "\n"
- << " (LEND - LBEG)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_LEND - t_LBEG )/M
- << " (multilaunch loop begin to end) "
- << "\n"
- << " (PCAT - LEND)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_PCAT - t_LEND )/M
- << " (topfold concat and clear subfold) "
+ LOG_IF(info, SEvt::MINTIME)
<< "\n"
- << " (TAIL - BRES)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float( t_TAIL - t_BRES )/M
- << " (QSim::reset which saves hits) "
- << "\n"
- << " tot_idt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_idt)/M
- << " (sum of kernel execution int64_t stamp differences in microseconds)"
- << "\n"
- << " tot_dt " << std::setw(10) << std::fixed << std::setprecision(6) << tot_dt
- << " int(tot_dt*M) " << std::setw(10) << int64_t(tot_dt*M)
- << " (sum of kernel execution double chrono stamp differences in seconds, and scaled to ms) "
- << "\n"
- << " tot_gdt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_gdt)/M
- << " (sum of SEvt::gather int64_t stamp differences in microseconds)"
- << "\n"
- ;
-
- return tot_dt ;
+ << SEvt::SEvt__MINTIME << "\n"
+ << " (TAIL - HEAD)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_TAIL - t_HEAD) / M
+ << " (head to tail of QSim::simulate method) " << "\n"
+ << " (LEND - LBEG)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_LEND - t_LBEG) / M
+ << " (multilaunch loop begin to end) " << "\n"
+ << " (PCAT - LEND)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_PCAT - t_LEND) / M
+ << " (topfold concat and clear subfold) " << "\n"
+ << " (TAIL - BRES)/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(t_TAIL - t_BRES) / M
+ << " (QSim::reset which saves hits) " << "\n"
+ << " tot_idt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_idt) / M
+ << " (sum of kernel execution int64_t stamp differences in microseconds)" << "\n"
+ << " tot_dt " << std::setw(10) << std::fixed << std::setprecision(6) << tot_dt << " int(tot_dt*M) "
+ << std::setw(10) << int64_t(tot_dt * M)
+ << " (sum of kernel execution double chrono stamp differences in seconds, and scaled to ms) " << "\n"
+ << " tot_gdt/M " << std::setw(10) << std::fixed << std::setprecision(6) << float(tot_gdt) / M
+ << " (sum of SEvt::gather int64_t stamp differences in microseconds)" << "\n";
+
+ return tot_dt;
}
-
/**
QSim::simulate_final_merge
---------------------------
@@ -633,58 +574,49 @@ TODO: use QEvt::FinalMerge_async once that makes sense
void QSim::simulate_final_merge(int64_t tot_ph, cudaStream_t stream)
{
bool has_hlm = sev->topfold->has_key(SComp::HITLITEMERGED_);
- bool has_hm = sev->topfold->has_key(SComp::HITMERGED_);
+ bool has_hm = sev->topfold->has_key(SComp::HITMERGED_);
- if( has_hlm )
+ if (has_hlm)
{
- const NP* hlm = sev->topfold->get(SComp::HITLITEMERGED_);
- NP* fin = QEvt::FinalMerge(hlm, stream);
+ const NP *hlm = sev->topfold->get(SComp::HITLITEMERGED_);
+ NP *fin = QEvt::FinalMerge(hlm, stream);
- float hlm_frac = float(hlm->num_items())/float(tot_ph) ;
- float fin_frac = float(fin->num_items())/float(hlm->num_items()) ;
+ float hlm_frac = float(hlm->num_items()) / float(tot_ph);
+ float fin_frac = float(fin->num_items()) / float(hlm->num_items());
- std::stringstream ss ;
- ss
- << " tot_ph " << tot_ph
- << " hlm " << ( hlm ? hlm->sstr() : "-" )
- << " fin " << ( fin ? fin->sstr() : "-" )
- << " hlm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hlm_frac
- << " fin/hlm " << std::setw(7) << std::fixed << std::setprecision(4) << fin_frac
- ;
+ std::stringstream ss;
+ ss << " tot_ph " << tot_ph << " hlm " << (hlm ? hlm->sstr() : "-") << " fin " << (fin ? fin->sstr() : "-")
+ << " hlm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hlm_frac << " fin/hlm "
+ << std::setw(7) << std::fixed << std::setprecision(4) << fin_frac;
std::string note = ss.str();
- fin->set_meta("QSim__simulate_final_merge", note );
+ fin->set_meta("QSim__simulate_final_merge", note);
- sev->topfold->set(SComp::HITLITEMERGED_, fin );
+ sev->topfold->set(SComp::HITLITEMERGED_, fin);
- LOG(info) << note ;
+ LOG(info) << note;
}
- if( has_hm )
+ if (has_hm)
{
- const NP* hm = sev->topfold->get(SComp::HITMERGED_);
- NP* fi = QEvt::FinalMerge(hm, stream);
+ const NP *hm = sev->topfold->get(SComp::HITMERGED_);
+ NP *fi = QEvt::FinalMerge(hm, stream);
- float hm_frac = float(hm->num_items())/float(tot_ph) ;
- float fi_frac = float(fi->num_items())/float(hm->num_items()) ;
+ float hm_frac = float(hm->num_items()) / float(tot_ph);
+ float fi_frac = float(fi->num_items()) / float(hm->num_items());
- std::stringstream ss ;
- ss
- << " tot_ph " << tot_ph
- << " hm " << ( hm ? hm->sstr() : "-" )
- << " fi " << ( fi ? fi->sstr() : "-" )
- << " hm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hm_frac
- << " fi/hm " << std::setw(7) << std::fixed << std::setprecision(4) << fi_frac
- ;
+ std::stringstream ss;
+ ss << " tot_ph " << tot_ph << " hm " << (hm ? hm->sstr() : "-") << " fi " << (fi ? fi->sstr() : "-")
+ << " hm/tot " << std::setw(7) << std::fixed << std::setprecision(4) << hm_frac << " fi/hm " << std::setw(7)
+ << std::fixed << std::setprecision(4) << fi_frac;
std::string note = ss.str();
- fi->set_meta("QSim__simulate_final_merge", note );
+ fi->set_meta("QSim__simulate_final_merge", note);
- sev->topfold->set(SComp::HITMERGED_, fi );
- LOG(info) << note ;
+ sev->topfold->set(SComp::HITMERGED_, fi);
+ LOG(info) << note;
}
}
-
/**
QSim::simulate
----------------
@@ -707,36 +639,30 @@ Thus is used from language crossing stack::
**/
-
-NP* QSim::simulate(const NP* gs, int eventID )
+NP *QSim::simulate(const NP *gs, int eventID)
{
bool eventID_expected = eventID > -1;
- if(!eventID_expected) std::cerr << "QSim::simulate gs lacks needed eventID metadata [" << eventID << "]\n" ;
+ if (!eventID_expected)
+ std::cerr << "QSim::simulate gs lacks needed eventID metadata [" << eventID << "]\n";
assert(eventID_expected);
- assert( sev == SEvt::Get_EGPU() );
+ assert(sev == SEvt::Get_EGPU());
sev->addGenstep(gs);
- bool reset_ = false ;
+ bool reset_ = false;
double tot_dt = simulate(eventID, reset_);
- const NP* _ht = sev->getHit();
- NP* ht = _ht ? _ht->copy() : nullptr ; // copy global hits from SEvt before reset
+ const NP *_ht = sev->getHit();
+ NP *ht = _ht ? _ht->copy() : nullptr; // copy global hits from SEvt before reset
ht->set_meta("QSim__simulate_tot_dt", tot_dt);
- LOG(info)
- << " eventID " << std::setw(6) << eventID
- << " gs " << ( gs ? gs->sstr() : "-" )
- << " ht " << ( ht ? ht->sstr() : "-" )
- << " tot_dt " << std::fixed << std::setw(10) << std::setprecision(6) << tot_dt
- ;
+ LOG(info) << " eventID " << std::setw(6) << eventID << " gs " << (gs ? gs->sstr() : "-") << " ht "
+ << (ht ? ht->sstr() : "-") << " tot_dt " << std::fixed << std::setw(10) << std::setprecision(6) << tot_dt;
reset(eventID);
- return ht ;
+ return ht;
}
-
-
/**
QSim::MaybeSaveIGS
--------------------
@@ -765,27 +691,21 @@ Try manually reducing slots to see if memory limits are the cause::
**/
-void QSim::MaybeSaveIGS(int eventID, NP* igs) // static
+void QSim::MaybeSaveIGS(int eventID, NP *igs) // static
{
- bool igs_null = igs == nullptr ;
- const char* igs_path = SAVE_IGS_PATH ? spath::Resolve(SAVE_IGS_PATH) : nullptr ;
- bool save_igs = igs && SAVE_IGS_EVENTID == eventID && igs_path ;
- LOG(LEVEL)
- << " eventID " << eventID
- << " igs " << ( igs ? igs->sstr() : "-" )
- << " igs_null " << ( igs_null ? "YES" : "NO " )
- << " [" << _QSim__SAVE_IGS_EVENTID << "] " << SAVE_IGS_EVENTID
- << " [" << _QSim__SAVE_IGS_PATH << "] " << ( SAVE_IGS_PATH ? SAVE_IGS_PATH : "-" )
- << " igs_path [" << ( igs_path ? igs_path : "-" ) << "]"
- << " save_igs " << ( save_igs ? "YES" : "NO " )
- ;
-
- if(!save_igs) return ;
+ bool igs_null = igs == nullptr;
+ const char *igs_path = SAVE_IGS_PATH ? spath::Resolve(SAVE_IGS_PATH) : nullptr;
+ bool save_igs = igs && SAVE_IGS_EVENTID == eventID && igs_path;
+ LOG(LEVEL) << " eventID " << eventID << " igs " << (igs ? igs->sstr() : "-") << " igs_null "
+ << (igs_null ? "YES" : "NO ") << " [" << _QSim__SAVE_IGS_EVENTID << "] " << SAVE_IGS_EVENTID << " ["
+ << _QSim__SAVE_IGS_PATH << "] " << (SAVE_IGS_PATH ? SAVE_IGS_PATH : "-") << " igs_path ["
+ << (igs_path ? igs_path : "-") << "]" << " save_igs " << (save_igs ? "YES" : "NO ");
+
+ if (!save_igs)
+ return;
igs->save(igs_path);
}
-
-
/**
QSim::getPhotonSlotOffset
---------------------------
@@ -810,14 +730,11 @@ or equal the number of states uploaded.
**/
-
-
unsigned long long QSim::get_photon_slot_offset() const
{
- return qev->get_photon_slot_offset() ;
+ return qev->get_photon_slot_offset();
}
-
/**
QSim::reset
------------
@@ -838,12 +755,10 @@ void QSim::reset(int eventID)
SProf::Add("QSim__reset_HEAD");
qev->clear();
sev->endOfEvent(eventID);
- LOG_IF(info, SEvt::LIFECYCLE) << "] eventID " << eventID ;
+ LOG_IF(info, SEvt::LIFECYCLE) << "] eventID " << eventID;
SProf::Add("QSim__reset_TAIL");
}
-
-
/**
QSim::simtrace
---------------
@@ -853,30 +768,26 @@ Collected genstep are uploaded and the CSGOptiX kernel is launched to generate a
**/
-
double QSim::simtrace(int eventID)
{
- assert( SEventConfig::IsRGModeSimtrace() );
-
+ assert(SEventConfig::IsRGModeSimtrace());
sev->beginOfEvent(eventID);
- NP* igs = sev->makeGenstepArrayFromVector();
+ NP *igs = sev->makeGenstepArrayFromVector();
- LOG_IF(fatal, igs==nullptr)
- << " igs NULL "
- << " sev.descGenstepArrayFromVector " << sev->descGenstepArrayFromVector()
- ;
+ LOG_IF(fatal, igs == nullptr) << " igs NULL " << " sev.descGenstepArrayFromVector "
+ << sev->descGenstepArrayFromVector();
assert(igs);
- int rc = qev->setGenstepUpload_NP(igs) ;
+ int rc = qev->setGenstepUpload_NP(igs);
- LOG_IF(error, rc != 0) << " QEvt::setGenstep ERROR : no gensteps collected : will skip cx.simtrace " ;
+ LOG_IF(error, rc != 0) << " QEvt::setGenstep ERROR : no gensteps collected : will skip cx.simtrace ";
- sev->t_PreLaunch = sstamp::Now() ;
- double dt = rc == 0 && cx != nullptr ? cx->simtrace_launch() : -1. ;
- sev->t_PostLaunch = sstamp::Now() ;
- sev->t_Launch = dt ;
+ sev->t_PreLaunch = sstamp::Now();
+ double dt = rc == 0 && cx != nullptr ? cx->simtrace_launch() : -1.;
+ sev->t_PostLaunch = sstamp::Now();
+ sev->t_Launch = dt;
// see ~/o/notes/issues/cxt_min_simtrace_revival.rst
sev->gather();
@@ -886,94 +797,80 @@ double QSim::simtrace(int eventID)
sev->endOfEvent(eventID);
- return dt ;
+ return dt;
}
-
-qsim* QSim::getDevicePtr() const
+qsim *QSim::getDevicePtr() const
{
- return d_sim ;
+ return d_sim;
}
-
char QSim::getScintTexFilterMode() const
{
- return scint->tex->getFilterMode() ;
+ return scint->tex->getFilterMode();
}
std::string QSim::desc() const
{
- std::stringstream ss ;
- ss << "QSim::desc"
- << std::endl
- << " this 0x" << std::hex << std::uint64_t(this) << std::dec
- << " INSTANCE 0x" << std::hex << std::uint64_t(INSTANCE) << std::dec
- << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec
- << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec
- ;
+ std::stringstream ss;
+ ss << "QSim::desc" << std::endl
+ << " this 0x" << std::hex << std::uint64_t(this) << std::dec << " INSTANCE 0x" << std::hex
+ << std::uint64_t(INSTANCE) << std::dec << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec
+ << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec;
std::string s = ss.str();
- return s ;
+ return s;
}
std::string QSim::descFull() const
{
- std::stringstream ss ;
- ss
- << std::endl
- << "QSim::descFull"
- << std::endl
- << " this 0x" << std::hex << std::uint64_t(this) << std::dec
- << " INSTANCE 0x" << std::hex << std::uint64_t(INSTANCE) << std::dec
- << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec
- << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec
- << " qsim.h:d_sim 0x" << std::hex << std::uint64_t(d_sim) << std::dec
- //<< " sim->rng_state 0x" << std::hex << std::uint64_t(sim->rng_state) << std::dec // tending to SEGV on some systems
- << " sim->base 0x" << std::hex << std::uint64_t(sim->base) << std::dec
- << " sim->bnd 0x" << std::hex << std::uint64_t(sim->bnd) << std::dec
- << " sim->scint 0x" << std::hex << std::uint64_t(sim->scint) << std::dec
- << " sim->cerenkov 0x" << std::hex << std::uint64_t(sim->cerenkov) << std::dec
- ;
+ std::stringstream ss;
+ ss << std::endl
+ << "QSim::descFull" << std::endl
+ << " this 0x" << std::hex << std::uint64_t(this) << std::dec << " INSTANCE 0x" << std::hex
+ << std::uint64_t(INSTANCE) << std::dec << " QEvt.hh:qev 0x" << std::hex << std::uint64_t(qev) << std::dec
+ << " qsim.h:sim 0x" << std::hex << std::uint64_t(sim) << std::dec << " qsim.h:d_sim 0x" << std::hex
+ << std::uint64_t(d_sim)
+ << std::dec
+ //<< " sim->rng_state 0x" << std::hex << std::uint64_t(sim->rng_state) << std::dec // tending to SEGV on some
+ // systems
+ << " sim->base 0x" << std::hex << std::uint64_t(sim->base) << std::dec << " sim->bnd 0x" << std::hex
+ << std::uint64_t(sim->bnd) << std::dec << " sim->scint 0x" << std::hex << std::uint64_t(sim->scint) << std::dec
+ << " sim->cerenkov 0x" << std::hex << std::uint64_t(sim->cerenkov) << std::dec;
std::string s = ss.str();
- return s ;
+ return s;
}
std::string QSim::descComponents() const
{
- std::stringstream ss ;
+ std::stringstream ss;
ss << std::endl
- << "QSim::descComponents"
- << std::endl
- << " (QBase)base " << ( base ? "YES" : "NO " ) << std::endl
- << " (QEvt)qev " << ( qev ? "YES" : "NO " ) << std::endl
- << " (SEvt)sev " << ( sev ? "YES" : "NO " ) << std::endl
- << " (QRng)rng " << ( rng ? "YES" : "NO " ) << std::endl
- << " (QScint)scint " << ( scint ? "YES" : "NO " ) << std::endl
- << " (QCerenkov)cerenkov " << ( cerenkov ? "YES" : "NO " ) << std::endl
- << " (QBnd)bnd " << ( bnd ? "YES" : "NO " ) << std::endl
- << " (QOptical)optical " << ( optical ? "YES" : "NO " ) << std::endl
- << " (QDebug)debug_ " << ( debug_ ? "YES" : "NO " ) << std::endl
- << " (QProp)prop " << ( prop ? "YES" : "NO " ) << std::endl
- << " (QPMT)pmt " << ( pmt ? "YES" : "NO " ) << std::endl
- << " (QMultiFilm)multifilm " << ( multifilm ? "YES" : "NO " ) << std::endl
- << " (qsim)sim " << ( sim ? "YES" : "NO " ) << std::endl
- << " (qsim)d_sim " << ( d_sim ? "YES" : "NO " ) << std::endl
- << " (qdebug)dbg " << ( dbg ? "YES" : "NO " ) << std::endl
- << " (qdebug)d_dbg " << ( d_dbg ? "YES" : "NO " ) << std::endl
- ;
+ << "QSim::descComponents" << std::endl
+ << " (QBase)base " << (base ? "YES" : "NO ") << std::endl
+ << " (QEvt)qev " << (qev ? "YES" : "NO ") << std::endl
+ << " (SEvt)sev " << (sev ? "YES" : "NO ") << std::endl
+ << " (QRng)rng " << (rng ? "YES" : "NO ") << std::endl
+ << " (QScint)scint " << (scint ? "YES" : "NO ") << std::endl
+ << " (QCerenkov)cerenkov " << (cerenkov ? "YES" : "NO ") << std::endl
+ << " (QBnd)bnd " << (bnd ? "YES" : "NO ") << std::endl
+ << " (QOptical)optical " << (optical ? "YES" : "NO ") << std::endl
+ << " (QDebug)debug_ " << (debug_ ? "YES" : "NO ") << std::endl
+ << " (QProp)prop " << (prop ? "YES" : "NO ") << std::endl
+ << " (QPMT)pmt " << (pmt ? "YES" : "NO ") << std::endl
+ << " (QMultiFilm)multifilm " << (multifilm ? "YES" : "NO ") << std::endl
+ << " (qsim)sim " << (sim ? "YES" : "NO ") << std::endl
+ << " (qsim)d_sim " << (d_sim ? "YES" : "NO ") << std::endl
+ << " (qdebug)dbg " << (dbg ? "YES" : "NO ") << std::endl
+ << " (qdebug)d_dbg " << (d_dbg ? "YES" : "NO ") << std::endl;
std::string s = ss.str();
- return s ;
+ return s;
}
-
-
-
-
-void QSim::configureLaunch(unsigned width, unsigned height )
+void QSim::configureLaunch(unsigned width, unsigned height)
{
QU::ConfigureLaunch(numBlocks, threadsPerBlock, width, height);
}
-void QSim::configureLaunch2D(unsigned width, unsigned height )
+void QSim::configureLaunch2D(unsigned width, unsigned height)
{
QU::ConfigureLaunch2D(numBlocks, threadsPerBlock, width, height);
}
@@ -988,20 +885,11 @@ void QSim::configureLaunch1D(unsigned num, unsigned threads_per_block)
QU::ConfigureLaunch1D(numBlocks, threadsPerBlock, num, threads_per_block);
}
-
std::string QSim::descLaunch() const
{
return QU::DescLaunch(numBlocks, threadsPerBlock);
}
-
-
-
-
-
-
-
-
/**
QSim::rng_sequence mass production with multiple launches...
--------------------------------------------------------------
@@ -1009,9 +897,8 @@ QSim::rng_sequence mass production with multiple launches...
The output files are split too::
epsilon:opticks blyth$ np.py *.npy
- a : TRngBufTest_0.npy : (10000, 16, 16) : 8f9b27c9416a0121574730baa742b5c9 : 20210715-1227
- epsilon:opticks blyth$ du -h TRngBufTest_0.npy
- 20M TRngBufTest_0.npy
+ a : TRngBufTest_0.npy : (10000, 16, 16) :
+8f9b27c9416a0121574730baa742b5c9 : 20210715-1227 epsilon:opticks blyth$ du -h TRngBufTest_0.npy 20M TRngBufTest_0.npy
In [6]: (16*16*4*2*10000)/1e6
Out[6]: 20.48
@@ -1024,10 +911,9 @@ Upping to 1M would be 100x 20M = 2000M 2GB
**/
-
template
-extern void QSim_rng_sequence( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, T* seq, unsigned ni, unsigned nj, unsigned id_offset );
-
+extern void QSim_rng_sequence(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, T *seq, unsigned ni, unsigned nj,
+ unsigned id_offset);
/**
QSim::rng_sequence generate randoms in single CUDA launch
@@ -1047,25 +933,22 @@ skipahead : used curand skipahead offsets depending on sim->evt->index and OPTIC
**/
-template
-void QSim::rng_sequence( T* seq, unsigned ni_tranche, unsigned nv, unsigned id_offset )
+template void QSim::rng_sequence(T *seq, unsigned ni_tranche, unsigned nv, unsigned id_offset)
{
- configureLaunch(ni_tranche, 1 );
+ configureLaunch(ni_tranche, 1);
- unsigned num_rng = ni_tranche*nv ;
+ unsigned num_rng = ni_tranche * nv;
- const char* label = "QSim::rng_sequence:num_rng" ;
+ const char *label = "QSim::rng_sequence:num_rng";
- T* d_seq = QU::device_alloc(num_rng, label );
+ T *d_seq = QU::device_alloc(num_rng, label);
- QSim_rng_sequence( numBlocks, threadsPerBlock, d_sim, d_seq, ni_tranche, nv, id_offset );
+ QSim_rng_sequence(numBlocks, threadsPerBlock, d_sim, d_seq, ni_tranche, nv, id_offset);
- QU::copy_device_to_host_and_free( seq, d_seq, num_rng, label );
+ QU::copy_device_to_host_and_free(seq, d_seq, num_rng, label);
}
-
-
-const char* QSim::PREFIX = "rng_sequence" ;
+const char *QSim::PREFIX = "rng_sequence";
/**
QSim::rng_sequence
@@ -1085,77 +968,47 @@ Default *dir* is $TMP/QSimTest/rng_sequence leading to npy paths like::
**/
template
-void QSim::rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size )
+void QSim::rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size)
{
- assert( ni >= ni_tranche_size && ni % ni_tranche_size == 0 ); // total size *ni* must be integral multiple of *ni_tranche_size*
- unsigned num_tranche = ni/ni_tranche_size ;
- unsigned nv = nj*nk ;
-
- unsigned size = ni_tranche_size*nv ; // number of randoms to be generated in each launch
- std::string reldir = QU::rng_sequence_reldir(PREFIX, ni, nj, nk, ni_tranche_size ) ;
-
- LOG(info)
- << " ni " << ni
- << " ni_tranche_size " << ni_tranche_size
- << " num_tranche " << num_tranche
- << " reldir " << reldir.c_str()
- << " nj " << nj
- << " nk " << nk
- << " nv(nj*nk) " << nv
- << " size(ni_tranche_size*nv) " << size
- << " typecode " << QU::typecode()
- ;
+ assert(ni >= ni_tranche_size &&
+ ni % ni_tranche_size == 0); // total size *ni* must be integral multiple of *ni_tranche_size*
+ unsigned num_tranche = ni / ni_tranche_size;
+ unsigned nv = nj * nk;
+
+ unsigned size = ni_tranche_size * nv; // number of randoms to be generated in each launch
+ std::string reldir = QU::rng_sequence_reldir(PREFIX, ni, nj, nk, ni_tranche_size);
+ LOG(info) << " ni " << ni << " ni_tranche_size " << ni_tranche_size << " num_tranche " << num_tranche << " reldir "
+ << reldir.c_str() << " nj " << nj << " nk " << nk << " nv(nj*nk) " << nv << " size(ni_tranche_size*nv) "
+ << size << " typecode " << QU::typecode();
// NB seq array memory gets reused for each launch and saved to different paths
- NP* seq = NP::Make(ni_tranche_size, nj, nk) ;
- T* seq_values = seq->values();
+ NP *seq = NP::Make(ni_tranche_size, nj, nk);
+ T *seq_values = seq->values();
NP::INT seq_nv = seq->num_values();
+ LOG(info) << " seq " << (seq ? seq->sstr() : "-") << " seq_values " << seq_values << " seq_nv " << seq_nv
+ << " seq_values[0] " << seq_values[0] << " seq_values[seq_nv-1] " << seq_values[seq_nv - 1];
- LOG(info)
- << " seq " << ( seq ? seq->sstr() : "-" )
- << " seq_values " << seq_values
- << " seq_nv " << seq_nv
- << " seq_values[0] " << seq_values[0]
- << " seq_values[seq_nv-1] " << seq_values[seq_nv-1]
- ;
-
-
-
- for(unsigned t=0 ; t < num_tranche ; t++)
+ for (unsigned t = 0; t < num_tranche; t++)
{
// *id_offset* controls which rng_state/RNG to use
- unsigned id_offset = ni_tranche_size*t ;
- std::string name = QU::rng_sequence_name(PREFIX, ni_tranche_size, nj, nk, id_offset ) ;
+ unsigned id_offset = ni_tranche_size * t;
+ std::string name = QU::rng_sequence_name(PREFIX, ni_tranche_size, nj, nk, id_offset);
- std::cout
- << std::setw(3) << t
- << std::setw(10) << id_offset
- << std::setw(100) << name.c_str()
- << std::endl
- ;
+ std::cout << std::setw(3) << t << std::setw(10) << id_offset << std::setw(100) << name.c_str() << std::endl;
- rng_sequence( seq_values, ni_tranche_size, nv, id_offset );
+ rng_sequence(seq_values, ni_tranche_size, nv, id_offset);
- const char* path = spath::Resolve(dir, reldir.c_str(), name.c_str() );
+ const char *path = spath::Resolve(dir, reldir.c_str(), name.c_str());
seq->save(path);
}
}
-
-
-template void QSim::rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size );
-template void QSim::rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size );
-
-
-
-
-
-
-
-
-
+template void QSim::rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk,
+ unsigned ni_tranche_size);
+template void QSim::rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk,
+ unsigned ni_tranche_size);
/**
QSim::scint_wavelength
@@ -1167,95 +1020,87 @@ the typical values of 10 or 20 which depend on the buffer creation.
**/
-extern void QSim_scint_wavelength( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, float* wavelength, unsigned num_wavelength );
+extern void QSim_scint_wavelength(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, float *wavelength,
+ unsigned num_wavelength);
-NP* QSim::scint_wavelength(unsigned num_wavelength, unsigned& hd_factor )
+NP *QSim::scint_wavelength(unsigned num_wavelength, unsigned &hd_factor)
{
bool qsim_disable_hd = ssys::getenvbool("QSIM_DISABLE_HD");
- hd_factor = qsim_disable_hd ? 0u : scint->tex->getHDFactor() ;
+ hd_factor = qsim_disable_hd ? 0u : scint->tex->getHDFactor();
// HMM: perhaps get this from sim rather than occupying an argument slot
- LOG(LEVEL) << "[" << " qsim_disable_hd " << qsim_disable_hd << " hd_factor " << hd_factor ;
+ LOG(LEVEL) << "[" << " qsim_disable_hd " << qsim_disable_hd << " hd_factor " << hd_factor;
- configureLaunch(num_wavelength, 1 );
+ configureLaunch(num_wavelength, 1);
- float* d_wavelength = QU::device_alloc(num_wavelength, "QSim::scint_wavelength/num_wavelength");
+ float *d_wavelength = QU::device_alloc(num_wavelength, "QSim::scint_wavelength/num_wavelength");
- QSim_scint_wavelength(numBlocks, threadsPerBlock, d_sim, d_wavelength, num_wavelength );
+ QSim_scint_wavelength(numBlocks, threadsPerBlock, d_sim, d_wavelength, num_wavelength);
- NP* w = NP::Make(num_wavelength) ;
+ NP *w = NP::Make(num_wavelength);
- QU::copy_device_to_host_and_free( (float*)w->bytes(), d_wavelength, num_wavelength, "QSim::scint_wavelength" );
+ QU::copy_device_to_host_and_free((float *)w->bytes(), d_wavelength, num_wavelength,
+ "QSim::scint_wavelength");
- LOG(LEVEL) << "]" ;
+ LOG(LEVEL) << "]";
- return w ;
+ return w;
}
+extern void QSim_RandGaussQ_shoot(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, float *v, unsigned num_v);
-extern void QSim_RandGaussQ_shoot( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, float* v, unsigned num_v );
-
-NP* QSim::RandGaussQ_shoot(unsigned num_v )
+NP *QSim::RandGaussQ_shoot(unsigned num_v)
{
- const char* label = "QSim::RandGaussQ_shoot/num" ;
- configureLaunch(num_v, 1 );
+ const char *label = "QSim::RandGaussQ_shoot/num";
+ configureLaunch(num_v, 1);
std::cout << label << " " << num_v << std::endl;
- float* d_v = QU::device_alloc(num_v, label );
+ float *d_v = QU::device_alloc(num_v, label);
- QSim_RandGaussQ_shoot(numBlocks, threadsPerBlock, d_sim, d_v, num_v );
+ QSim_RandGaussQ_shoot(numBlocks, threadsPerBlock, d_sim, d_v, num_v);
cudaDeviceSynchronize();
- NP* v = NP::Make(num_v) ;
- QU::copy_device_to_host_and_free( (float*)v->bytes(), d_v, num_v, label );
+ NP *v = NP::Make(num_v);
+ QU::copy_device_to_host_and_free((float *)v->bytes(), d_v, num_v, label);
- return v ;
+ return v;
}
-
-
-
-void QSim::dump_wavelength( float* wavelength, unsigned num_wavelength, unsigned edgeitems )
+void QSim::dump_wavelength(float *wavelength, unsigned num_wavelength, unsigned edgeitems)
{
LOG(LEVEL);
- for(unsigned i=0 ; i < num_wavelength ; i++)
+ for (unsigned i = 0; i < num_wavelength; i++)
{
- if( i < edgeitems || i > num_wavelength - edgeitems)
+ if (i < edgeitems || i > num_wavelength - edgeitems)
{
- std::cout
- << std::setw(10) << i
- << std::setw(10) << std::fixed << std::setprecision(3) << wavelength[i]
- << std::endl
- ;
+ std::cout << std::setw(10) << i << std::setw(10) << std::fixed << std::setprecision(3) << wavelength[i]
+ << std::endl;
}
}
}
+extern void QSim_dbg_gs_generate(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, qdebug *dbg, sphoton *photon,
+ unsigned num_photon, unsigned type);
-extern void QSim_dbg_gs_generate(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, qdebug* dbg, sphoton* photon, unsigned num_photon, unsigned type ) ;
-
-
-NP* QSim::dbg_gs_generate(unsigned num_photon, unsigned type )
+NP *QSim::dbg_gs_generate(unsigned num_photon, unsigned type)
{
- assert( type == SCINT_GENERATE || type == CERENKOV_GENERATE );
+ assert(type == SCINT_GENERATE || type == CERENKOV_GENERATE);
- configureLaunch( num_photon, 1 );
- sphoton* d_photon = QU::device_alloc(num_photon, "QSim::dbg_gs_generate:num_photon") ;
+ configureLaunch(num_photon, 1);
+ sphoton *d_photon = QU::device_alloc(num_photon, "QSim::dbg_gs_generate:num_photon");
QU::device_memset(d_photon, 0, num_photon);
- QSim_dbg_gs_generate(numBlocks, threadsPerBlock, d_sim, d_dbg, d_photon, num_photon, type );
+ QSim_dbg_gs_generate(numBlocks, threadsPerBlock, d_sim, d_dbg, d_photon, num_photon, type);
- NP* p = NP::Make(num_photon, 4, 4);
- const char* label = "QSim::dbg_gs_generate" ;
+ NP *p = NP::Make(num_photon, 4, 4);
+ const char *label = "QSim::dbg_gs_generate";
- QU::copy_device_to_host_and_free( (sphoton*)p->bytes(), d_photon, num_photon, label );
- return p ;
+ QU::copy_device_to_host_and_free((sphoton *)p->bytes(), d_photon, num_photon, label);
+ return p;
}
-
-
-extern void QSim_generate_photon(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim ) ;
+extern void QSim_generate_photon(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim);
/**
QSim::generate_photon
@@ -1263,92 +1108,71 @@ QSim::generate_photon
**/
-
void QSim::generate_photon()
{
- LOG(LEVEL) << "[" ;
+ LOG(LEVEL) << "[";
- unsigned num_photon = qev->getNumPhoton() ;
- LOG(info) << " num_photon " << num_photon ;
+ unsigned num_photon = qev->getNumPhoton();
+ LOG(info) << " num_photon " << num_photon;
- LOG_IF(fatal, num_photon == 0 )
- << " num_photon zero : MUST QEvt::setGenstep before QSim::generate_photon "
- ;
+ LOG_IF(fatal, num_photon == 0) << " num_photon zero : MUST QEvt::setGenstep before QSim::generate_photon ";
- assert( num_photon > 0 );
- assert( d_sim );
+ assert(num_photon > 0);
+ assert(d_sim);
- configureLaunch( num_photon, 1 );
+ configureLaunch(num_photon, 1);
- LOG(info) << "QSim_generate_photon... " ;
+ LOG(info) << "QSim_generate_photon... ";
- QSim_generate_photon(numBlocks, threadsPerBlock, d_sim );
+ QSim_generate_photon(numBlocks, threadsPerBlock, d_sim);
- LOG(LEVEL) << "]" ;
+ LOG(LEVEL) << "]";
}
+extern void QSim_fill_state_0(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad6 *state, unsigned num_state,
+ qdebug *dbg);
-
-
-
-
-extern void QSim_fill_state_0(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad6* state, unsigned num_state, qdebug* dbg );
-
-void QSim::fill_state_0(quad6* state, unsigned num_state)
+void QSim::fill_state_0(quad6 *state, unsigned num_state)
{
- assert( d_sim );
- assert( d_dbg );
-
- quad6* d_state = QU::device_alloc(num_state, "QSim::fill_state_0:num_state") ;
+ assert(d_sim);
+ assert(d_dbg);
+ quad6 *d_state = QU::device_alloc(num_state, "QSim::fill_state_0:num_state");
- unsigned threads_per_block = 32 ;
- configureLaunch1D( num_state, threads_per_block );
+ unsigned threads_per_block = 32;
+ configureLaunch1D(num_state, threads_per_block);
- LOG(info)
- << " num_state " << num_state
- << " threads_per_block " << threads_per_block
- << " descLaunch " << descLaunch()
- ;
+ LOG(info) << " num_state " << num_state << " threads_per_block " << threads_per_block << " descLaunch "
+ << descLaunch();
- QSim_fill_state_0(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg );
+ QSim_fill_state_0(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg);
- const char* label = "QSim::fill_state_0" ;
- QU::copy_device_to_host_and_free( state, d_state, num_state, label );
+ const char *label = "QSim::fill_state_0";
+ QU::copy_device_to_host_and_free(state, d_state, num_state, label);
}
+extern void QSim_fill_state_1(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, sstate *state, unsigned num_state,
+ qdebug *dbg);
-extern void QSim_fill_state_1(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, sstate* state, unsigned num_state, qdebug* dbg );
-
-void QSim::fill_state_1(sstate* state, unsigned num_state)
+void QSim::fill_state_1(sstate *state, unsigned num_state)
{
- assert( d_sim );
- assert( d_dbg );
+ assert(d_sim);
+ assert(d_dbg);
- sstate* d_state = QU::device_alloc(num_state, "QSim::fill_state_1:num_state") ;
+ sstate *d_state = QU::device_alloc(num_state, "QSim::fill_state_1:num_state");
- unsigned threads_per_block = 64 ;
- configureLaunch1D( num_state, threads_per_block );
+ unsigned threads_per_block = 64;
+ configureLaunch1D(num_state, threads_per_block);
- LOG(info)
- << " num_state " << num_state
- << " threads_per_block " << threads_per_block
- << " descLaunch " << descLaunch()
- ;
+ LOG(info) << " num_state " << num_state << " threads_per_block " << threads_per_block << " descLaunch "
+ << descLaunch();
- QSim_fill_state_1(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg );
+ QSim_fill_state_1(numBlocks, threadsPerBlock, d_sim, d_state, num_state, d_dbg);
- const char* label = "QSim::fill_state_1" ;
- QU::copy_device_to_host_and_free( state, d_state, num_state, label );
+ const char *label = "QSim::fill_state_1";
+ QU::copy_device_to_host_and_free(state, d_state, num_state, label);
}
-
-
-
-
-
-
-
/**
extern QSim_quad_launch
--------------------------
@@ -1357,43 +1181,39 @@ This function is implemented in QSim.cu and it used by *quad_launch_generate*
**/
-extern void QSim_quad_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad* q, unsigned num_quad, qdebug* dbg, unsigned type );
-
-
+extern void QSim_quad_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad *q, unsigned num_quad, qdebug *dbg,
+ unsigned type);
-NP* QSim::quad_launch_generate(unsigned num_quad, unsigned type )
+NP *QSim::quad_launch_generate(unsigned num_quad, unsigned type)
{
- assert( d_sim );
- assert( d_dbg );
+ assert(d_sim);
+ assert(d_dbg);
- const char* label = "QSim::quad_launch_generate:num_quad" ;
+ const char *label = "QSim::quad_launch_generate:num_quad";
- quad* d_q = QU::device_alloc(num_quad, label ) ;
+ quad *d_q = QU::device_alloc(num_quad, label);
- unsigned threads_per_block = 512 ;
- configureLaunch1D( num_quad, threads_per_block );
+ unsigned threads_per_block = 512;
+ configureLaunch1D(num_quad, threads_per_block);
- QSim_quad_launch(numBlocks, threadsPerBlock, d_sim, d_q, num_quad, d_dbg, type );
+ QSim_quad_launch(numBlocks, threadsPerBlock, d_sim, d_q, num_quad, d_dbg, type);
- NP* q = NP::Make( num_quad, 4 );
- quad* qq = (quad*)q->bytes();
+ NP *q = NP::Make(num_quad, 4);
+ quad *qq = (quad *)q->bytes();
- QU::copy_device_to_host_and_free( qq, d_q, num_quad, label );
+ QU::copy_device_to_host_and_free(qq, d_q, num_quad, label);
- if( type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA || type == QGEN_SMEAR_NORMAL_POLISH )
+ if (type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA || type == QGEN_SMEAR_NORMAL_POLISH)
{
- q->set_meta("normal", scuda::serialize(dbg->normal) );
- q->set_meta("direction", scuda::serialize(dbg->direction) );
- q->set_meta("value", dbg->value );
- q->set_meta("valuename", type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA ? "sigma_alpha" : "polish" );
+ q->set_meta("normal", scuda::serialize(dbg->normal));
+ q->set_meta("direction", scuda::serialize(dbg->direction));
+ q->set_meta("value", dbg->value);
+ q->set_meta("valuename", type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA ? "sigma_alpha" : "polish");
}
- return q ;
+ return q;
}
-
-
-
/**
extern QSim_photon_launch
--------------------------
@@ -1402,8 +1222,8 @@ This function is implemented in QSim.cu and it used by BOTH *photon_launch_gener
**/
-extern void QSim_photon_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg, unsigned type );
-
+extern void QSim_photon_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, sphoton *photon, unsigned num_photon,
+ qdebug *dbg, unsigned type);
/**
QSim::photon_launch_generate
@@ -1414,32 +1234,29 @@ then downloads the generated photons into the host array. Contrast with *photon_
**/
-NP* QSim::photon_launch_generate(unsigned num_photon, unsigned type )
+NP *QSim::photon_launch_generate(unsigned num_photon, unsigned type)
{
- assert( d_sim );
- assert( d_dbg );
+ assert(d_sim);
+ assert(d_dbg);
- const char* label = "QSim::photon_launch_generate:num_photon" ;
+ const char *label = "QSim::photon_launch_generate:num_photon";
- sphoton* d_photon = QU::device_alloc(num_photon, label ) ;
+ sphoton *d_photon = QU::device_alloc(num_photon, label);
QU::device_memset(d_photon, 0, num_photon);
- unsigned threads_per_block = 512 ;
- configureLaunch1D( num_photon, threads_per_block );
+ unsigned threads_per_block = 512;
+ configureLaunch1D(num_photon, threads_per_block);
- QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, num_photon, d_dbg, type );
+ QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, num_photon, d_dbg, type);
- NP* p = NP::Make(num_photon, 4, 4);
- sphoton* photon = (sphoton*)p->bytes() ;
+ NP *p = NP::Make(num_photon, 4, 4);
+ sphoton *photon = (sphoton *)p->bytes();
- QU::copy_device_to_host_and_free( photon, d_photon, num_photon, label );
+ QU::copy_device_to_host_and_free(photon, d_photon, num_photon, label);
- return p ;
+ return p;
}
-
-
-
/**
QSim::photon_launch_mutate
---------------------------
@@ -1448,45 +1265,35 @@ This uploads the photon array provided, mutates it and then downloads the change
**/
-void QSim::photon_launch_mutate(sphoton* photon, unsigned num_photon, unsigned type )
+void QSim::photon_launch_mutate(sphoton *photon, unsigned num_photon, unsigned type)
{
- assert( d_sim );
- assert( d_dbg );
+ assert(d_sim);
+ assert(d_dbg);
- const char* label_0 = "QSim::photon_launch_mutate/d_photon" ;
- sphoton* d_photon = QU::UploadArray(photon, num_photon, label_0 );
+ const char *label_0 = "QSim::photon_launch_mutate/d_photon";
+ sphoton *d_photon = QU::UploadArray(photon, num_photon, label_0);
- unsigned DEBUG_NUM_PHOTON = ssys::getenvunsigned(_QSim__photon_launch_mutate_DEBUG_NUM_PHOTON, 0 );
- bool DEBUG_NUM_PHOTON_valid = DEBUG_NUM_PHOTON > 0 && DEBUG_NUM_PHOTON <= num_photon ;
- unsigned u_num_photon = DEBUG_NUM_PHOTON_valid ? DEBUG_NUM_PHOTON : num_photon ;
- bool SKIP_LAUNCH = ssys::getenvbool(_QSim__photon_launch_mutate_SKIP_LAUNCH) ;
+ unsigned DEBUG_NUM_PHOTON = ssys::getenvunsigned(_QSim__photon_launch_mutate_DEBUG_NUM_PHOTON, 0);
+ bool DEBUG_NUM_PHOTON_valid = DEBUG_NUM_PHOTON > 0 && DEBUG_NUM_PHOTON <= num_photon;
+ unsigned u_num_photon = DEBUG_NUM_PHOTON_valid ? DEBUG_NUM_PHOTON : num_photon;
+ bool SKIP_LAUNCH = ssys::getenvbool(_QSim__photon_launch_mutate_SKIP_LAUNCH);
- LOG_IF( error, DEBUG_NUM_PHOTON_valid || true )
- << _QSim__photon_launch_mutate_DEBUG_NUM_PHOTON
- << " DEBUG_NUM_PHOTON " << DEBUG_NUM_PHOTON
- << " num_photon " << num_photon
- << " u_num_photon " << u_num_photon
- << _QSim__photon_launch_mutate_SKIP_LAUNCH
- << " " << ( SKIP_LAUNCH ? "YES" : "NO " )
- ;
+ LOG_IF(error, DEBUG_NUM_PHOTON_valid || true)
+ << _QSim__photon_launch_mutate_DEBUG_NUM_PHOTON << " DEBUG_NUM_PHOTON " << DEBUG_NUM_PHOTON << " num_photon "
+ << num_photon << " u_num_photon " << u_num_photon << _QSim__photon_launch_mutate_SKIP_LAUNCH << " "
+ << (SKIP_LAUNCH ? "YES" : "NO ");
-
- if( SKIP_LAUNCH == false )
+ if (SKIP_LAUNCH == false)
{
- unsigned threads_per_block = 512 ;
- configureLaunch1D( u_num_photon, threads_per_block );
- QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, u_num_photon, d_dbg, type );
+ unsigned threads_per_block = 512;
+ configureLaunch1D(u_num_photon, threads_per_block);
+ QSim_photon_launch(numBlocks, threadsPerBlock, d_sim, d_photon, u_num_photon, d_dbg, type);
}
-
- const char* label_1 = "QSim::photon_launch_mutate" ;
- QU::copy_device_to_host_and_free( photon, d_photon, u_num_photon, label_1 );
+ const char *label_1 = "QSim::photon_launch_mutate";
+ QU::copy_device_to_host_and_free(photon, d_photon, u_num_photon, label_1);
}
-
-
-
-
/**
QSim::UploadFakePRD (formerly "UploadMockPRD" )
----------------------------------------------------
@@ -1494,33 +1301,28 @@ QSim::UploadFakePRD (formerly "UploadMockPRD" )
Caution this returns a device pointer.
**/
-quad2* QSim::UploadFakePRD(const NP* ip, const NP* prd) // static
+quad2 *QSim::UploadFakePRD(const NP *ip, const NP *prd) // static
{
assert(ip);
- int num_ip = ip->shape[0] ;
- assert( num_ip > 0 );
+ int num_ip = ip->shape[0];
+ assert(num_ip > 0);
- assert( prd->has_shape( num_ip, -1, 2, 4 ) ); // TODO: evt->max_record checking
- assert( prd->shape.size() == 4 && prd->shape[2] == 2 && prd->shape[3] == 4 );
- int num_prd = prd->shape[0]*prd->shape[1] ;
+ assert(prd->has_shape(num_ip, -1, 2, 4)); // TODO: evt->max_record checking
+ assert(prd->shape.size() == 4 && prd->shape[2] == 2 && prd->shape[3] == 4);
+ int num_prd = prd->shape[0] * prd->shape[1];
- LOG(LEVEL)
- << "["
- << " num_ip " << num_ip
- << " num_prd " << num_prd
- << " prd " << prd->sstr()
- ;
+ LOG(LEVEL) << "[" << " num_ip " << num_ip << " num_prd " << num_prd << " prd " << prd->sstr();
- const char* label = "QSim::UploadFakePRD/d_prd" ;
- quad2* d_prd = QU::UploadArray( (quad2*)prd->bytes(), num_prd, label );
+ const char *label = "QSim::UploadFakePRD/d_prd";
+ quad2 *d_prd = QU::UploadArray((quad2 *)prd->bytes(), num_prd, label);
// prd is non-standard so it is appropriate to adhoc upload here
- return d_prd ;
+ return d_prd;
}
#if !defined(PRODUCTION)
-extern void QSim_fake_propagate_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad2* prd );
+extern void QSim_fake_propagate_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad2 *prd);
#endif
/**
@@ -1543,7 +1345,7 @@ using common QEvt functionality
**/
-void QSim::fake_propagate( const NP* prd, unsigned type )
+void QSim::fake_propagate(const NP *prd, unsigned type)
{
#if defined(PRODUCTION)
(void)prd;
@@ -1551,126 +1353,105 @@ void QSim::fake_propagate( const NP* prd, unsigned type )
LOG(fatal) << "QSim::fake_propagate is disabled in PRODUCTION builds";
std::raise(SIGINT);
#else
- const NP* ip = sev->getInputPhoton();
- int num_ip = ip ? ip->shape[0] : 0 ;
- assert( num_ip > 0 );
+ const NP *ip = sev->getInputPhoton();
+ int num_ip = ip ? ip->shape[0] : 0;
+ assert(num_ip > 0);
- quad2* d_prd = UploadFakePRD(ip, prd) ;
+ quad2 *d_prd = UploadFakePRD(ip, prd);
- NP* igs = sev->makeGenstepArrayFromVector();
+ NP *igs = sev->makeGenstepArrayFromVector();
int rc = qev->setGenstepUpload_NP(igs);
- assert( rc == 0 );
- if(rc!=0) std::raise(SIGINT);
+ assert(rc == 0);
+ if (rc != 0)
+ std::raise(SIGINT);
- sev->add_array("prd0", prd );
+ sev->add_array("prd0", prd);
// NB SEvt::beginOfEvent calls SEvt/clear so this addition
// must be after that to succeed in being added to SEvt saved arrays
int num_photon = qev->getNumPhoton();
- bool consistent_num_photon = num_photon == num_ip ;
+ bool consistent_num_photon = num_photon == num_ip;
LOG_IF(fatal, !consistent_num_photon)
- << "["
- << " num_ip " << num_ip
- << " QEvt::getNumPhoton " << num_photon
- << " consistent_num_photon " << ( consistent_num_photon ? "YES" : "NO " )
- << " prd " << prd->sstr()
- ;
+ << "[" << " num_ip " << num_ip << " QEvt::getNumPhoton " << num_photon << " consistent_num_photon "
+ << (consistent_num_photon ? "YES" : "NO ") << " prd " << prd->sstr();
assert(consistent_num_photon);
- assert( qev->upload_count > 0 );
+ assert(qev->upload_count > 0);
- unsigned threads_per_block = 512 ;
- configureLaunch1D( num_photon, threads_per_block );
+ unsigned threads_per_block = 512;
+ configureLaunch1D(num_photon, threads_per_block);
- QSim_fake_propagate_launch(numBlocks, threadsPerBlock, d_sim, d_prd );
+ QSim_fake_propagate_launch(numBlocks, threadsPerBlock, d_sim, d_prd);
cudaDeviceSynchronize();
-
- LOG(LEVEL) << "]" ;
+ LOG(LEVEL) << "]";
#endif
}
+extern void QSim_boundary_lookup_all(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, quad *lookup, unsigned width,
+ unsigned height);
-
-extern void QSim_boundary_lookup_all( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, quad* lookup, unsigned width, unsigned height );
-
-NP* QSim::boundary_lookup_all(unsigned width, unsigned height )
+NP *QSim::boundary_lookup_all(unsigned width, unsigned height)
{
- LOG(LEVEL) << "[" ;
- assert( bnd );
- assert( width <= getBoundaryTexWidth() );
- assert( height <= getBoundaryTexHeight() );
+ LOG(LEVEL) << "[";
+ assert(bnd);
+ assert(width <= getBoundaryTexWidth());
+ assert(height <= getBoundaryTexHeight());
- unsigned num_lookup = width*height ;
- LOG(LEVEL)
- << " width " << width
- << " height " << height
- << " num_lookup " << num_lookup
- ;
+ unsigned num_lookup = width * height;
+ LOG(LEVEL) << " width " << width << " height " << height << " num_lookup " << num_lookup;
+ configureLaunch(width, height);
- configureLaunch(width, height );
+ const char *label = "QSim::boundary_lookup_all:num_lookup";
- const char* label = "QSim::boundary_lookup_all:num_lookup" ;
+ quad *d_lookup = QU::device_alloc(num_lookup, label);
+ QSim_boundary_lookup_all(numBlocks, threadsPerBlock, d_sim, d_lookup, width, height);
- quad* d_lookup = QU::device_alloc(num_lookup, label ) ;
- QSim_boundary_lookup_all(numBlocks, threadsPerBlock, d_sim, d_lookup, width, height );
+ assert(height % 8 == 0);
+ unsigned num_bnd = height / 8;
- assert( height % 8 == 0 );
- unsigned num_bnd = height/8 ;
+ NP *l = NP::Make(num_bnd, 4, 2, width, 4);
+ QU::copy_device_to_host_and_free((quad *)l->bytes(), d_lookup, num_lookup, label);
- NP* l = NP::Make( num_bnd, 4, 2, width, 4 );
- QU::copy_device_to_host_and_free( (quad*)l->bytes(), d_lookup, num_lookup, label );
-
- LOG(LEVEL) << "]" ;
-
- return l ;
+ LOG(LEVEL) << "]";
+ return l;
}
-extern void QSim_boundary_lookup_line( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, quad* lookup, float* domain, unsigned num_lookup, unsigned line, unsigned k );
+extern void QSim_boundary_lookup_line(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, quad *lookup, float *domain,
+ unsigned num_lookup, unsigned line, unsigned k);
-
-NP* QSim::boundary_lookup_line( float* domain, unsigned num_lookup, unsigned line, unsigned k )
+NP *QSim::boundary_lookup_line(float *domain, unsigned num_lookup, unsigned line, unsigned k)
{
- LOG(LEVEL)
- << "["
- << " num_lookup " << num_lookup
- << " line " << line
- << " k " << k
- ;
-
- configureLaunch(num_lookup, 1 );
+ LOG(LEVEL) << "[" << " num_lookup " << num_lookup << " line " << line << " k " << k;
- float* d_domain = QU::device_alloc(num_lookup, "QSim::boundary_lookup_line:num_lookup") ;
+ configureLaunch(num_lookup, 1);
- QU::copy_host_to_device( d_domain, domain, num_lookup );
+ float *d_domain = QU::device_alloc(num_lookup, "QSim::boundary_lookup_line:num_lookup");
- const char* label = "QSim::boundary_lookup_line:num_lookup" ;
+ QU::copy_host_to_device(d_domain, domain, num_lookup);
- quad* d_lookup = QU::device_alloc(num_lookup, label ) ;
+ const char *label = "QSim::boundary_lookup_line:num_lookup";
- QSim_boundary_lookup_line(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, num_lookup, line, k );
+ quad *d_lookup = QU::device_alloc(num_lookup, label);
+ QSim_boundary_lookup_line(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, num_lookup, line, k);
- NP* l = NP::Make( num_lookup, 4 );
+ NP *l = NP::Make(num_lookup, 4);
- QU::copy_device_to_host_and_free( (quad*)l->bytes(), d_lookup, num_lookup, label );
+ QU::copy_device_to_host_and_free((quad *)l->bytes(), d_lookup, num_lookup, label);
- QU::device_free( d_domain );
+ QU::device_free(d_domain);
- LOG(LEVEL) << "]" ;
+ LOG(LEVEL) << "]";
- return l ;
+ return l;
}
-
-
-
-
/**
QSim::prop_lookup
--------------------
@@ -1681,59 +1462,43 @@ below *prop_lookup_onebyone*
**/
-
template
-extern void QSim_prop_lookup( dim3 numBlocks, dim3 threadsPerBlock, qsim* d_sim, T* lookup, const T* domain, unsigned domain_width, unsigned* pids, unsigned num_pids );
+extern void QSim_prop_lookup(dim3 numBlocks, dim3 threadsPerBlock, qsim *d_sim, T *lookup, const T *domain,
+ unsigned domain_width, unsigned *pids, unsigned num_pids);
template
-void QSim::prop_lookup( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids )
+void QSim::prop_lookup(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids)
{
- unsigned num_pids = pids.size() ;
- unsigned num_lookup = num_pids*domain_width ;
- LOG(LEVEL)
- << "["
- << " num_pids " << num_pids
- << " domain_width " << domain_width
- << " num_lookup " << num_lookup
- ;
+ unsigned num_pids = pids.size();
+ unsigned num_lookup = num_pids * domain_width;
+ LOG(LEVEL) << "[" << " num_pids " << num_pids << " domain_width " << domain_width << " num_lookup " << num_lookup;
- configureLaunch(domain_width, num_pids );
+ configureLaunch(domain_width, num_pids);
- unsigned* d_pids = QU::device_alloc(num_pids, "QSim::prop_lookup:num_pids") ;
- T* d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup:domain_width") ;
- T* d_lookup = QU::device_alloc(num_lookup , "QSim::prop_lookup:num_lookup") ;
+ unsigned *d_pids = QU::device_alloc(num_pids, "QSim::prop_lookup:num_pids");
+ T *d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup:domain_width");
+ T *d_lookup = QU::device_alloc(num_lookup, "QSim::prop_lookup:num_lookup");
- QU::copy_host_to_device( d_domain, domain, domain_width );
- QU::copy_host_to_device( d_pids, pids.data(), num_pids );
+ QU::copy_host_to_device(d_domain, domain, domain_width);
+ QU::copy_host_to_device(d_pids, pids.data(), num_pids);
- QSim_prop_lookup(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, d_pids, num_pids );
+ QSim_prop_lookup(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, d_pids, num_pids);
- QU::copy_device_to_host_and_free( lookup, d_lookup, num_lookup );
- QU::device_free( d_domain );
- QU::device_free( d_pids );
+ QU::copy_device_to_host_and_free(lookup, d_lookup, num_lookup);
+ QU::device_free(d_domain);
+ QU::device_free(d_pids);
- LOG(LEVEL) << "]" ;
+ LOG(LEVEL) << "]";
}
-
-
/**
Hmm doing lookups like this is a very common pattern, could do with
a sub context to carry the pieces to simplify doing that.
**/
template
-extern void QSim_prop_lookup_one(
- dim3 numBlocks,
- dim3 threadsPerBlock,
- qsim* sim,
- T* lookup,
- const T* domain,
- unsigned domain_width,
- unsigned num_pids,
- unsigned pid,
- unsigned ipid
-);
+extern void QSim_prop_lookup_one(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, T *lookup, const T *domain,
+ unsigned domain_width, unsigned num_pids, unsigned pid, unsigned ipid);
/**
QSim::prop_lookup_onebyone
@@ -1749,203 +1514,155 @@ On device uses::
**/
template
-void QSim::prop_lookup_onebyone( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids )
+void QSim::prop_lookup_onebyone(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids)
{
- unsigned num_pids = pids.size() ;
- unsigned num_lookup = num_pids*domain_width ;
- LOG(LEVEL)
- << "["
- << " num_pids " << num_pids
- << " domain_width " << domain_width
- << " num_lookup " << num_lookup
- ;
+ unsigned num_pids = pids.size();
+ unsigned num_lookup = num_pids * domain_width;
+ LOG(LEVEL) << "[" << " num_pids " << num_pids << " domain_width " << domain_width << " num_lookup " << num_lookup;
- configureLaunch(domain_width, 1 );
+ configureLaunch(domain_width, 1);
- T* d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup_onebyone:domain_width") ;
- QU::copy_host_to_device( d_domain, domain, domain_width );
+ T *d_domain = QU::device_alloc(domain_width, "QSim::prop_lookup_onebyone:domain_width");
+ QU::copy_host_to_device(d_domain, domain, domain_width);
- const char* label = "QSim::prop_lookup_onebyone:num_lookup" ;
+ const char *label = "QSim::prop_lookup_onebyone:num_lookup";
- T* d_lookup = QU::device_alloc(num_lookup, label ) ;
+ T *d_lookup = QU::device_alloc(num_lookup, label);
// separate launches for each pid
- for(unsigned ipid=0 ; ipid < num_pids ; ipid++)
+ for (unsigned ipid = 0; ipid < num_pids; ipid++)
{
- unsigned pid = pids[ipid] ;
- QSim_prop_lookup_one(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, num_pids, pid, ipid );
+ unsigned pid = pids[ipid];
+ QSim_prop_lookup_one(numBlocks, threadsPerBlock, d_sim, d_lookup, d_domain, domain_width, num_pids, pid,
+ ipid);
}
- QU::copy_device_to_host_and_free( lookup, d_lookup, num_lookup, label );
+ QU::copy_device_to_host_and_free(lookup, d_lookup, num_lookup, label);
- QU::device_free( d_domain );
+ QU::device_free(d_domain);
- LOG(LEVEL) << "]" ;
+ LOG(LEVEL) << "]";
}
+template void QSim::prop_lookup_onebyone(float *, const float *, unsigned, const std::vector &);
+template void QSim::prop_lookup_onebyone(double *, const double *, unsigned, const std::vector &);
-template void QSim::prop_lookup_onebyone( float*, const float* , unsigned, const std::vector& );
-template void QSim::prop_lookup_onebyone( double*, const double* , unsigned, const std::vector& );
-
-
+extern void QSim_multifilm_lookup_all(dim3 numBlocks, dim3 threadsPerBlock, qsim *sim, quad2 *sample, quad2 *result,
+ unsigned width, unsigned height);
-
-
-
-extern void QSim_multifilm_lookup_all( dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad2* sample, quad2* result, unsigned width, unsigned height );
-
-void QSim::multifilm_lookup_all( quad2 * sample , quad2 * result , unsigned width, unsigned height )
+void QSim::multifilm_lookup_all(quad2 *sample, quad2 *result, unsigned width, unsigned height)
{
- LOG(LEVEL) << "[" ;
- unsigned num_lookup = width*height ;
- unsigned size = num_lookup ;
+ LOG(LEVEL) << "[";
+ unsigned num_lookup = width * height;
+ unsigned size = num_lookup;
- LOG(LEVEL)
- << " width " << width
- << " height " << height
- << " num_lookup " << num_lookup
- << " size "<(size, "QSim::multifilm_lookup_all:size" ) ;
+ // const float * c_sample = sample;
+ quad2 *d_sample = QU::device_alloc(size, "QSim::multifilm_lookup_all:size");
- const char* label = "QSim::multifilm_lookup_all:size" ;
+ const char *label = "QSim::multifilm_lookup_all:size";
- quad2* d_result = QU::device_alloc(size, label ) ;
- LOG(LEVEL)
- <<" copy_host_to_device( d_sample, sample , size) before";
- QU::copy_host_to_device( d_sample, sample , size);
- LOG(LEVEL)
- <<" copy_host_to_device( d_sample, sample , size) after";
+ quad2 *d_result = QU::device_alloc(size, label);
+ LOG(LEVEL) << " copy_host_to_device( d_sample, sample , size) before";
+ QU::copy_host_to_device(d_sample, sample, size);
+ LOG(LEVEL) << " copy_host_to_device( d_sample, sample , size) after";
- QSim_multifilm_lookup_all(numBlocks, threadsPerBlock, d_sim, d_sample, d_result, width, height );
- QU::copy_device_to_host_and_free( result , d_result , size, label );
+ QSim_multifilm_lookup_all(numBlocks, threadsPerBlock, d_sim, d_sample, d_result, width, height);
+ QU::copy_device_to_host_and_free(result, d_result, size, label);
QU::device_free(d_sample);
cudaDeviceSynchronize();
- LOG(LEVEL) << "]" ;
+ LOG(LEVEL) << "]";
}
-
-
-
unsigned QSim::getBoundaryTexWidth() const
{
- return bnd->tex->width ;
+ return bnd->tex->width;
}
unsigned QSim::getBoundaryTexHeight() const
{
- return bnd->tex->height ;
+ return bnd->tex->height;
}
-const NP* QSim::getBoundaryTexSrc() const
+const NP *QSim::getBoundaryTexSrc() const
{
- return bnd->src ;
+ return bnd->src;
}
-void QSim::dump_photon( quad4* photon, unsigned num_photon, const char* opt_, unsigned edgeitems )
+void QSim::dump_photon(quad4 *photon, unsigned num_photon, const char *opt_, unsigned edgeitems)
{
LOG(LEVEL);
- std::string opt = opt_ ;
+ std::string opt = opt_;
- bool f0 = opt.find("f0") != std::string::npos ;
- bool f1 = opt.find("f1") != std::string::npos ;
- bool f2 = opt.find("f2") != std::string::npos ;
- bool f3 = opt.find("f3") != std::string::npos ;
+ bool f0 = opt.find("f0") != std::string::npos;
+ bool f1 = opt.find("f1") != std::string::npos;
+ bool f2 = opt.find("f2") != std::string::npos;
+ bool f3 = opt.find("f3") != std::string::npos;
- bool i0 = opt.find("i0") != std::string::npos ;
- bool i1 = opt.find("i1") != std::string::npos ;
- bool i2 = opt.find("i2") != std::string::npos ;
- bool i3 = opt.find("i3") != std::string::npos ;
+ bool i0 = opt.find("i0") != std::string::npos;
+ bool i1 = opt.find("i1") != std::string::npos;
+ bool i2 = opt.find("i2") != std::string::npos;
+ bool i3 = opt.find("i3") != std::string::npos;
- int wi = 7 ;
- int pr = 2 ;
+ int wi = 7;
+ int pr = 2;
- for(unsigned i=0 ; i < num_photon ; i++)
+ for (unsigned i = 0; i < num_photon; i++)
{
- if( i < edgeitems || i > num_photon - edgeitems)
+ if (i < edgeitems || i > num_photon - edgeitems)
{
- const quad4& p = photon[i] ;
-
- std::cout
- << std::setw(wi) << i
- ;
-
- if(f0) std::cout
- << " f0 "
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.x
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.y
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.z
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.w
- ;
-
- if(f1) std::cout
- << " f1 "
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.x
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.y
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.z
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.w
- ;
-
- if(f2) std::cout
- << " f2 "
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.x
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.y
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.z
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.w
- ;
-
- if(f3) std::cout
- << " f3 "
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.x
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.y
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.z
- << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.w
- ;
-
- if(i0) std::cout
- << " i0 "
- << std::setw(wi) << p.q0.i.x
- << std::setw(wi) << p.q0.i.y
- << std::setw(wi) << p.q0.i.z
- << std::setw(wi) << p.q0.i.w
- ;
-
- if(i1) std::cout
- << " i1 "
- << std::setw(wi) << p.q1.i.x
- << std::setw(wi) << p.q1.i.y
- << std::setw(wi) << p.q1.i.z
- << std::setw(wi) << p.q1.i.w
- ;
-
- if(i2) std::cout
- << " i2 "
- << std::setw(wi) << p.q2.i.x
- << std::setw(wi) << p.q2.i.y
- << std::setw(wi) << p.q2.i.z
- << std::setw(wi) << p.q2.i.w
- ;
-
- if(i3) std::cout
- << " i3 "
- << std::setw(wi) << p.q3.i.x
- << std::setw(wi) << p.q3.i.y
- << std::setw(wi) << p.q3.i.z
- << std::setw(wi) << p.q3.i.w
- ;
-
- std::cout
- << std::endl
- ;
+ const quad4 &p = photon[i];
+
+ std::cout << std::setw(wi) << i;
+
+ if (f0)
+ std::cout << " f0 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q0.f.x << std::setw(wi)
+ << std::fixed << std::setprecision(pr) << p.q0.f.y << std::setw(wi) << std::fixed
+ << std::setprecision(pr) << p.q0.f.z << std::setw(wi) << std::fixed << std::setprecision(pr)
+ << p.q0.f.w;
+
+ if (f1)
+ std::cout << " f1 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q1.f.x << std::setw(wi)
+ << std::fixed << std::setprecision(pr) << p.q1.f.y << std::setw(wi) << std::fixed
+ << std::setprecision(pr) << p.q1.f.z << std::setw(wi) << std::fixed << std::setprecision(pr)
+ << p.q1.f.w;
+
+ if (f2)
+ std::cout << " f2 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q2.f.x << std::setw(wi)
+ << std::fixed << std::setprecision(pr) << p.q2.f.y << std::setw(wi) << std::fixed
+ << std::setprecision(pr) << p.q2.f.z << std::setw(wi) << std::fixed << std::setprecision(pr)
+ << p.q2.f.w;
+
+ if (f3)
+ std::cout << " f3 " << std::setw(wi) << std::fixed << std::setprecision(pr) << p.q3.f.x << std::setw(wi)
+ << std::fixed << std::setprecision(pr) << p.q3.f.y << std::setw(wi) << std::fixed
+ << std::setprecision(pr) << p.q3.f.z << std::setw(wi) << std::fixed << std::setprecision(pr)
+ << p.q3.f.w;
+
+ if (i0)
+ std::cout << " i0 " << std::setw(wi) << p.q0.i.x << std::setw(wi) << p.q0.i.y << std::setw(wi)
+ << p.q0.i.z << std::setw(wi) << p.q0.i.w;
+
+ if (i1)
+ std::cout << " i1 " << std::setw(wi) << p.q1.i.x << std::setw(wi) << p.q1.i.y << std::setw(wi)
+ << p.q1.i.z << std::setw(wi) << p.q1.i.w;
+
+ if (i2)
+ std::cout << " i2 " << std::setw(wi) << p.q2.i.x << std::setw(wi) << p.q2.i.y << std::setw(wi)
+ << p.q2.i.z << std::setw(wi) << p.q2.i.w;
+
+ if (i3)
+ std::cout << " i3 " << std::setw(wi) << p.q3.i.x << std::setw(wi) << p.q3.i.y << std::setw(wi)
+ << p.q3.i.z << std::setw(wi) << p.q3.i.w;
+
+ std::cout << std::endl;
}
}
}
-
/**
QSim::Desc
------------
@@ -1960,10 +1677,10 @@ Dump flags with::
ssys_test
**/
-std::string QSim::Desc(char delim) // static
+std::string QSim::Desc(char delim) // static
{
- std::stringstream ss ;
- ss << ( delim == ',' ? "" : "QSim::Desc\n" )
+ std::stringstream ss;
+ ss << (delim == ',' ? "" : "QSim::Desc\n")
#ifdef CONFIG_Debug
<< "CONFIG_Debug"
#else
@@ -2041,17 +1758,12 @@ std::string QSim::Desc(char delim) // static
#else
<< "NOT-RNG_PHILITEOX"
#endif
- << delim
- ;
- std::string str = ss.str() ;
- return str ;
+ << delim;
+ std::string str = ss.str();
+ return str;
}
-
-
-std::string QSim::Switches() // static
+std::string QSim::Switches() // static
{
return Desc(',');
}
-
-
diff --git a/qudarap/QSim.hh b/qudarap/QSim.hh
index 5d9c38471..16e0f81a6 100644
--- a/qudarap/QSim.hh
+++ b/qudarap/QSim.hh
@@ -1,8 +1,8 @@
#pragma once
+#include
#include
#include
-#include
#include "QUDARAP_API_EXPORT.hh"
#include "plog/Severity.h"
@@ -23,194 +23,182 @@ HMM : MOST OF THIS API IS FOR TESTING ONLY : TODO: Move lots to QSimTest perhap
**/
-struct NP ;
-struct SSim ;
-struct SEvt ;
+struct NP;
+struct SSim;
+struct SEvt;
-template struct QTex ;
-template struct QBuf ;
-template struct QProp ;
-template struct QPMT ;
+template struct QTex;
+template struct QBuf;
+template struct QProp;
+template struct QPMT;
-struct qsim ;
+struct qsim;
-struct QBase ;
-struct QEvt ;
-struct QRng ;
-struct QScint ;
-struct QCerenkov ;
-struct QBnd ;
+struct QBase;
+struct QEvt;
+struct QRng;
+struct QScint;
+struct QWls;
+struct QCerenkov;
+struct QBnd;
struct QMultiFilm;
-struct QOptical ;
-struct QEvt ;
-struct QDebug ;
+struct QOptical;
+struct QEvt;
+struct QDebug;
-struct qdebug ;
-struct sstate ;
+struct qdebug;
+struct sstate;
-struct quad4 ;
-struct quad2 ;
-struct sphoton ;
-union quad ;
+struct quad4;
+struct quad2;
+struct sphoton;
+union quad;
-struct SSimulator ;
+struct SSimulator;
struct QUDARAP_API QSim
{
- static constexpr const int64_t M = 1000000 ;
- static constexpr const int64_t G = 1000000000 ;
-
- static const plog::Severity LEVEL ;
- static const char* PREFIX ;
- static QSim* INSTANCE ;
- static QSim* Get();
- static QSim* Create();
+ static constexpr const int64_t M = 1000000;
+ static constexpr const int64_t G = 1000000000;
- static void UploadComponents(const SSim* ssim);
+ static const plog::Severity LEVEL;
+ static const char *PREFIX;
+ static QSim *INSTANCE;
+ static QSim *Get();
+ static QSim *Create();
- const QBase* base ;
- QEvt* qev ;
- SEvt* sev ;
+ static void UploadComponents(const SSim *ssim);
- const QRng* rng ;
- const QScint* scint ;
- const QCerenkov* cerenkov ;
- const QBnd* bnd ;
- const QOptical* optical ;
- const QDebug* debug_ ;
+ const QBase *base;
+ QEvt *qev;
+ SEvt *sev;
- const QProp* prop ;
- const QPMT* pmt ;
- const QMultiFilm* multifilm ;
+ const QRng *rng;
+ const QScint *scint;
+ const QWls *qwls;
+ const QCerenkov *cerenkov;
+ const QBnd *bnd;
+ const QOptical *optical;
+ const QDebug *debug_;
- qsim* sim ;
- qsim* d_sim ;
+ const QProp *prop;
+ const QPMT *pmt;
+ const QMultiFilm *multifilm;
- qdebug* dbg ;
- qdebug* d_dbg ;
+ qsim *sim;
+ qsim *d_sim;
- SSimulator* cx ;
+ qdebug *dbg;
+ qdebug *d_dbg;
+ SSimulator *cx;
- dim3 numBlocks ;
- dim3 threadsPerBlock ;
+ dim3 numBlocks;
+ dim3 threadsPerBlock;
-private:
+ private:
QSim();
void init();
- static constexpr const char* _QSim__REQUIRE_PMT = "QSim__REQUIRE_PMT" ;
- static const bool REQUIRE_PMT;
-
- static constexpr const char* _QSim__SAVE_IGS_EVENTID = "QSim__SAVE_IGS_EVENTID" ;
- static const int SAVE_IGS_EVENTID ;
-
- static constexpr const char* _QSim__SAVE_IGS_PATH = "QSim__SAVE_IGS_PATH" ;
- static const char* SAVE_IGS_PATH ;
+ static constexpr const char *_QSim__REQUIRE_PMT = "QSim__REQUIRE_PMT";
+ static const bool REQUIRE_PMT;
- static constexpr const char* _QSim__CONCAT = "QSim__CONCAT" ;
- static const bool CONCAT ;
+ static constexpr const char *_QSim__SAVE_IGS_EVENTID = "QSim__SAVE_IGS_EVENTID";
+ static const int SAVE_IGS_EVENTID;
- static constexpr const char* _QSim__ALLOC = "QSim__ALLOC" ;
- static const bool ALLOC ;
+ static constexpr const char *_QSim__SAVE_IGS_PATH = "QSim__SAVE_IGS_PATH";
+ static const char *SAVE_IGS_PATH;
+ static constexpr const char *_QSim__CONCAT = "QSim__CONCAT";
+ static const bool CONCAT;
-public:
- void setLauncher(SSimulator* cx_ );
+ static constexpr const char *_QSim__ALLOC = "QSim__ALLOC";
+ static const bool ALLOC;
- static constexpr const char* QSim__simulate_KEEP_SUBFOLD = "QSim__simulate_KEEP_SUBFOLD" ;
- static bool KEEP_SUBFOLD ;
+ public:
+ void setLauncher(SSimulator *cx_);
- double simulate(int eventID, bool reset_ ); // via cx launch
- void simulate_final_merge(int64_t tot_ph, cudaStream_t stream);
+ static constexpr const char *QSim__simulate_KEEP_SUBFOLD = "QSim__simulate_KEEP_SUBFOLD";
+ static bool KEEP_SUBFOLD;
+ double simulate(int eventID, bool reset_); // via cx launch
+ void simulate_final_merge(int64_t tot_ph, cudaStream_t stream);
+ NP *simulate(const NP *gs, int eventID); // higher level API for use from CSGOptiXService.h
- NP* simulate(const NP* gs, int eventID ); // higher level API for use from CSGOptiXService.h
+ static void MaybeSaveIGS(int eventID, NP *igs);
- static void MaybeSaveIGS(int eventID, NP* igs);
+ unsigned long long get_photon_slot_offset() const;
- unsigned long long get_photon_slot_offset() const ;
-
- void reset( int eventID);
+ void reset(int eventID);
double simtrace(int eventID);
-
- qsim* getDevicePtr() const ;
- std::string desc() const ;
- std::string descFull() const ;
- std::string descComponents() const ;
-
+ qsim *getDevicePtr() const;
+ std::string desc() const;
+ std::string descFull() const;
+ std::string descComponents() const;
// TODO: relocate non-essential methods into tests or elsewhere
- char getScintTexFilterMode() const ;
+ char getScintTexFilterMode() const;
void configureLaunch16();
- void configureLaunch( unsigned width, unsigned height );
- void configureLaunch2D( unsigned width, unsigned height );
+ void configureLaunch(unsigned width, unsigned height);
+ void configureLaunch2D(unsigned width, unsigned height);
void configureLaunch1D(unsigned num, unsigned threads_per_block);
- std::string descLaunch() const ;
-
+ std::string descLaunch() const;
- template
- void rng_sequence( dim3 numblocks, dim3 threadsPerBlock, qsim* d_sim, T* d_seq, unsigned ni_tranche, unsigned nv, unsigned ioffset );
+ template
+ void rng_sequence(dim3 numblocks, dim3 threadsPerBlock, qsim *d_sim, T *d_seq, unsigned ni_tranche, unsigned nv,
+ unsigned ioffset);
- template
- void rng_sequence( T* seq, unsigned ni, unsigned nj, unsigned ioffset );
+ template void rng_sequence(T *seq, unsigned ni, unsigned nj, unsigned ioffset);
- template
- void rng_sequence( const char* dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size );
+ template
+ void rng_sequence(const char *dir, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size);
+ NP *scint_wavelength(unsigned num_wavelength, unsigned &hd_factor);
- NP* scint_wavelength( unsigned num_wavelength, unsigned& hd_factor );
-
- NP* RandGaussQ_shoot(unsigned num_v );
-
+ NP *RandGaussQ_shoot(unsigned num_v);
// NP* cerenkov_wavelength_rejection_sampled( unsigned num_wavelength );
- void dump_wavelength( float* wavelength, unsigned num_wavelength, unsigned edgeitems=10 );
-
-
- NP* dbg_gs_generate(unsigned num_photon, unsigned type );
+ void dump_wavelength(float *wavelength, unsigned num_wavelength, unsigned edgeitems = 10);
+ NP *dbg_gs_generate(unsigned num_photon, unsigned type);
- void dump_photon( quad4* photon, unsigned num_photon, const char* opt="f0,f1,f2,i3", unsigned egdeitems=10 );
+ void dump_photon(quad4 *photon, unsigned num_photon, const char *opt = "f0,f1,f2,i3", unsigned egdeitems = 10);
void generate_photon();
- void fill_state_0(quad6* state, unsigned num_state);
- void fill_state_1(sstate* state, unsigned num_state);
+ void fill_state_0(quad6 *state, unsigned num_state);
+ void fill_state_1(sstate *state, unsigned num_state);
- NP* quad_launch_generate(unsigned num_quad, unsigned type );
- NP* photon_launch_generate(unsigned num_photon, unsigned type );
+ NP *quad_launch_generate(unsigned num_quad, unsigned type);
+ NP *photon_launch_generate(unsigned num_photon, unsigned type);
- static constexpr const char* _QSim__photon_launch_mutate_DEBUG_NUM_PHOTON = "QSim__photon_launch_mutate_DEBUG_NUM_PHOTON" ;
- static constexpr const char* _QSim__photon_launch_mutate_SKIP_LAUNCH = "QSim__photon_launch_mutate_SKIP_LAUNCH" ;
- void photon_launch_mutate( sphoton* photon, unsigned num_photon, unsigned type );
+ static constexpr const char *_QSim__photon_launch_mutate_DEBUG_NUM_PHOTON =
+ "QSim__photon_launch_mutate_DEBUG_NUM_PHOTON";
+ static constexpr const char *_QSim__photon_launch_mutate_SKIP_LAUNCH = "QSim__photon_launch_mutate_SKIP_LAUNCH";
+ void photon_launch_mutate(sphoton *photon, unsigned num_photon, unsigned type);
+ static quad2 *UploadFakePRD(const NP *ip, const NP *prd);
+ void fake_propagate(const NP *prd, unsigned type);
- static quad2* UploadFakePRD(const NP* ip, const NP* prd);
- void fake_propagate(const NP* prd, unsigned type );
+ unsigned getBoundaryTexWidth() const;
+ unsigned getBoundaryTexHeight() const;
+ const NP *getBoundaryTexSrc() const;
- unsigned getBoundaryTexWidth() const ;
- unsigned getBoundaryTexHeight() const ;
- const NP* getBoundaryTexSrc() const ;
+ NP *boundary_lookup_all(unsigned width, unsigned height);
+ NP *boundary_lookup_line(float *domain, unsigned num_lookup, unsigned line, unsigned k);
- NP* boundary_lookup_all( unsigned width, unsigned height ) ;
- NP* boundary_lookup_line( float* domain, unsigned num_lookup, unsigned line, unsigned k ) ;
+ template
+ void prop_lookup(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids);
+ template
+ void prop_lookup_onebyone(T *lookup, const T *domain, unsigned domain_width, const std::vector &pids);
- template
- void prop_lookup( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids ) ;
+ void multifilm_lookup_all(quad2 *sample, quad2 *result, unsigned width, unsigned height);
- template
- void prop_lookup_onebyone( T* lookup, const T* domain, unsigned domain_width, const std::vector& pids ) ;
-
- void multifilm_lookup_all( quad2* sample , quad2* result , unsigned width, unsigned height );
-
- static std::string Desc(char delim='\n');
+ static std::string Desc(char delim = '\n');
static std::string Switches();
};
-
-
diff --git a/qudarap/QU.cc b/qudarap/QU.cc
index 97aacf985..82fde9e2c 100644
--- a/qudarap/QU.cc
+++ b/qudarap/QU.cc
@@ -3,9 +3,9 @@
#include "NP.hh"
#include "SLOG.hh"
-#include "spath.h"
-#include "sdirectory.h"
#include "scuda.h"
+#include "sdirectory.h"
+#include "spath.h"
#include "squad.h"
#include "ssys.h"
@@ -17,100 +17,91 @@
#include "sphoton.h"
#include "sphotonlite.h"
-#include "sevent.h"
-#include "salloc.h"
#include "SEventConfig.hh"
+#include "salloc.h"
+#include "sevent.h"
-#include "QUDA_CHECK.h"
#include "QU.hh"
+#include "QUDA_CHECK.h"
#include "curand_kernel.h"
#include "qrng.h"
#include "qsim.h"
#include "qbase.h"
-#include "qprop.h"
-#include "qpmt.h"
-#include "qdebug.h"
-#include "qscint.h"
#include "qcerenkov.h"
#include "qcurandwrap.h"
-#include "scurandref.h"
+#include "qdebug.h"
#include "qmultifilm.h"
+#include "qpmt.h"
+#include "qprop.h"
+#include "qscint.h"
+#include "qwls.h"
+#include "scurandref.h"
-
-const plog::Severity QU::LEVEL = SLOG::EnvLevel("QU", "DEBUG") ;
+const plog::Severity QU::LEVEL = SLOG::EnvLevel("QU", "DEBUG");
bool QU::MEMCHECK = ssys::getenvbool(_MEMCHECK);
-salloc* QU::alloc = nullptr ;
-
+salloc *QU::alloc = nullptr;
-void QU::alloc_add(const char* label, uint64_t num_items, uint64_t sizeof_item ) // static
+void QU::alloc_add(const char *label, uint64_t num_items, uint64_t sizeof_item) // static
{
- if(!alloc) alloc = SEventConfig::ALLOC ;
- if(alloc ) alloc->add(label, num_items, sizeof_item );
+ if (!alloc)
+ alloc = SEventConfig::ALLOC;
+ if (alloc)
+ alloc->add(label, num_items, sizeof_item);
}
-
-template
-char QU::typecode()
+template char QU::typecode()
{
- char c = '?' ;
- switch(sizeof(T))
+ char c = '?';
+ switch (sizeof(T))
{
- case 4: c = 'f' ; break ;
- case 8: c = 'd' ; break ;
+ case 4:
+ c = 'f';
+ break;
+ case 8:
+ c = 'd';
+ break;
}
- return c ;
+ return c;
}
-template char QU::typecode() ;
-template char QU::typecode() ;
-
+template char QU::typecode();
+template char QU::typecode();
template
-std::string QU::rng_sequence_name(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset ) // static
+std::string QU::rng_sequence_name(const char *prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset) // static
{
- std::stringstream ss ;
- ss << prefix
- << "_" << QU::typecode()
- << "_ni" << ni
- << "_nj" << nj
- << "_nk" << nk
- << "_ioffset" << std::setw(6) << std::setfill('0') << ioffset
- << ".npy"
- ;
+ std::stringstream ss;
+ ss << prefix << "_" << QU::typecode() << "_ni" << ni << "_nj" << nj << "_nk" << nk << "_ioffset" << std::setw(6)
+ << std::setfill('0') << ioffset << ".npy";
std::string name = ss.str();
- return name ;
+ return name;
}
-template std::string QU::rng_sequence_name(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset ) ;
-template std::string QU::rng_sequence_name(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ioffset ) ;
-
-
+template std::string QU::rng_sequence_name(const char *prefix, unsigned ni, unsigned nj, unsigned nk,
+ unsigned ioffset);
+template std::string QU::rng_sequence_name(const char *prefix, unsigned ni, unsigned nj, unsigned nk,
+ unsigned ioffset);
template
-std::string QU::rng_sequence_reldir(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ) // static
+std::string QU::rng_sequence_reldir(const char *prefix, unsigned ni, unsigned nj, unsigned nk,
+ unsigned ni_tranche_size) // static
{
- std::stringstream ss ;
- ss << prefix
- << "_" << QU::typecode()
- << "_ni" << ni
- << "_nj" << nj
- << "_nk" << nk
- << "_tranche" << ni_tranche_size
- ;
+ std::stringstream ss;
+ ss << prefix << "_" << QU::typecode() << "_ni" << ni << "_nj" << nj << "_nk" << nk << "_tranche"
+ << ni_tranche_size;
std::string reldir = ss.str();
- return reldir ;
+ return reldir;
}
-template std::string QU::rng_sequence_reldir(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ) ;
-template std::string QU::rng_sequence_reldir(const char* prefix, unsigned ni, unsigned nj, unsigned nk, unsigned ni_tranche_size ) ;
-
-
-
+template std::string QU::rng_sequence_reldir(const char *prefix, unsigned ni, unsigned nj, unsigned nk,
+ unsigned ni_tranche_size);
+template std::string QU::rng_sequence_reldir(const char *prefix, unsigned ni, unsigned nj, unsigned nk,
+ unsigned ni_tranche_size);
/**
QU::UploadArray
@@ -120,61 +111,52 @@ Allocate on device and copy from host to device
**/
-template
-T* QU::UploadArray(const T* array, unsigned num_items, const char* label ) // static
+template T *QU::UploadArray(const T *array, unsigned num_items, const char *label) // static
{
- size_t size = num_items*sizeof(T) ;
-
- LOG(LEVEL)
- << " num_items " << num_items
- << " size " << size
- << " label " << ( label ? label : "-" )
- ;
+ size_t size = num_items * sizeof(T);
- LOG_IF(info, MEMCHECK)
- << " num_items " << num_items
- << " size " << size
- << " label " << ( label ? label : "-" )
- ;
+ LOG(LEVEL) << " num_items " << num_items << " size " << size << " label " << (label ? label : "-");
+ LOG_IF(info, MEMCHECK) << " num_items " << num_items << " size " << size << " label " << (label ? label : "-");
- alloc_add( label, num_items, sizeof(T) ) ;
+ alloc_add(label, num_items, sizeof(T));
- T* d_array = nullptr ;
- QUDA_CHECK( cudaMalloc(reinterpret_cast( &d_array ), size ));
- QUDA_CHECK( cudaMemcpy(reinterpret_cast( d_array ), array, size, cudaMemcpyHostToDevice ));
- return d_array ;
+ T *d_array = nullptr;
+ QUDA_CHECK(cudaMalloc(reinterpret_cast(&d_array), size));
+ QUDA_CHECK(cudaMemcpy(reinterpret_cast(d_array), array, size, cudaMemcpyHostToDevice));
+ return d_array;
}
-
// IF NEED THESE FROM REMOVE PKG WILL NEED TO QUDARAP_API
-template float* QU::UploadArray(const float* array, unsigned num_items, const char* label ) ;
-template double* QU::UploadArray(const double* array, unsigned num_items, const char* label) ;
-template unsigned* QU::UploadArray(const unsigned* array, unsigned num_items, const char* label) ;
-template int* QU::UploadArray(const int* array, unsigned num_items, const char* label) ;
-template quad4* QU::UploadArray(const quad4* array, unsigned num_items, const char* label) ;
-template sphoton* QU::UploadArray(const sphoton* array, unsigned num_items, const char* label) ;
-template sphotonlite* QU::UploadArray(const sphotonlite* array, unsigned num_items, const char* label) ;
-template quad2* QU::UploadArray(const quad2* array, unsigned num_items, const char* label) ;
-template XORWOW* QU::UploadArray(const XORWOW* array, unsigned num_items, const char* label) ;
-template Philox* QU::UploadArray(const Philox* array, unsigned num_items, const char* label) ;
-template qcurandwrap* QU::UploadArray>(const qcurandwrap* array, unsigned num_items, const char* label) ;
-template scurandref