trinity/strategy.c at master · kernelslacker/trinity · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
 * Multi-strategy syscall-picker rotation: arm-selection policies.
 *
 * Two arm-selection policies are wired in here: a fixed round-robin
 * (the original picker) and a UCB1 bandit picker.  Both consume the
 * per-strategy edge attribution recorded in
 * shm->pc_edge_calls_by_strategy[]; the bandit treats those call
 * counts as the reward signal and biases future window picks toward
 * arms producing new-edge calls the fastest, while still occasionally
 * exploring the others so a temporarily-stuck arm doesn't starve
 * forever.  The parallel pc_edge_count_by_strategy[] series (real
 * bucket counts) is surfaced in dump_strategy_stats() as a diagnostic
 * but does not currently feed the learner.
 *
 * Picker mode is selected once at parse_args() time via --strategy
 * and stashed in shm->picker_mode so every child agrees on the
 * policy.  The CAS-winning child at a rotation boundary calls
 * pick_next_strategy() and bandit_record_pull(), runs the bandit
 * math itself, and writes the outcome back to shm->current_strategy.
 *
 * UCB1 is tractable here because the arm count is tiny (NR_STRATEGIES
 * is a handful -- three today, see enum strategy_t) and the picker only
 * runs once per STRATEGY_WINDOW (~100 sec at 10K iter/sec).
 * Floating-point sqrt and log inside the picker are noise relative to
 * the work done in the window itself.
 */

#include <limits.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "child.h"		/* struct childdata */
#include "cmp_hints.h"		/* cmp_hints_shm */
#include "cred_throttle.h"	/* cred_class_for_nr, CRED_CLASS_NR */
#include "debug.h"
#include "kcov.h"		/* KCOV_CMP_RECORDS_MAX, kcov_shm */
#include "params.h"
#include "rnd.h"
#include "shm.h"
#include "stats.h"		/* stats_log_write */
#include "stats_ring.h"		/* parent_stats */
#include "strategy.h"
#include "syscall.h"		/* MAX_NR_SYSCALL */
#include "tables.h"		/* syscalls, max_nr_syscalls */
#include "utils.h"

/* Same KCOV_CMP_CONST bit cmp_hints.c uses; from uapi/linux/kcov.h. */
#define KCOV_CMP_CONST  (1U << 0)
#define WORDS_PER_CMP   4

/*
 * Set by parse_args() before init_shm() runs, then propagated into
 * shm->picker_mode where every child reads it on the rotation path.
 * Not declared in a header — only params.c writes it and only
 * init_shm() reads it.
 *
 * Default is PICKER_BANDIT_UCB1: the plateau-intervention layer in
 * select_next_strategy() is bandit-gated, and the mode-aware default
 * for explorer_children (see clamp_default_explorer_children() in
 * params.c) only allocates the strategy-independent explorer pool
 * under bandit mode.  Defaulting to round-robin made both of those
 * adaptive paths silently inert -- a plateau under the round-robin
 * default would flip kcov_shm->plateau_active with nothing on the
 * picker side to respond.  --strategy=round-robin (alias rr) and
 * --strategy=bandit (aliases ucb1, bandit-ucb1) remain available as
 * explicit overrides.
 */
enum picker_mode_t picker_mode_arg = PICKER_BANDIT_UCB1;

/*
 * UCB1 exploration constant.  Standard derivation gives c = sqrt(2);
 * we leave it tunable because reward magnitudes (edges-per-window)
 * are not in [0,1] and the right exploration weight depends on the
 * normalisation choice.  Rewards are normalised by the largest
 * mean-reward observed across arms (see ucb1_score), so a c near 1
 * keeps the exploration term comparable to the exploit term.
 */
#define UCB1_EXPLORATION_C 1.41421356  /* sqrt(2) */

/*
 * EMA discount for the recent_pulls_x1000[]/recent_reward_x1000[]
 * counters defined in shm.h.  Each non-intervention window multiplies
 * every arm's counter by gamma = 1 - alpha, then increments the
 * active arm by 1.0 / window_reward.  Alpha = 0.05 gives a half-life
 * of log(0.5)/log(0.95) ≈ 13.5 windows -- around 22 minutes of fleet
 * wall time at the ~100 sec/window cadence, comfortably inside the
 * 10-30 window design target and short enough that the picker
 * recovers within a few rotations when an arm's yield collapses.
 *
 * BANDIT_EMA_SCALE is the fixed-point divisor: counters are stored
 * as their real value times 1000 so the integer math stays exact for
 * the alpha=0.05 step.
 */
#define BANDIT_EMA_ALPHA_X1000  50UL
#define BANDIT_EMA_SCALE        1000UL

/*
 * Decay one fixed-point counter by gamma = 1 - alpha.  Operates on
 * the parts-per-thousand value: 950/1000 stays integer and the
 * multiplication can't overflow because the inputs cap at
 * ~SCALE/alpha * (typical per-window reward) = a few million for
 * recent_pulls_x1000 (≤ 20000) and on the order of 1e9 for
 * recent_reward_x1000 (bounded by the headroom of unsigned long).
 */
static inline unsigned long bandit_ema_decay(unsigned long x_x1000)
{
	return (x_x1000 * (BANDIT_EMA_SCALE - BANDIT_EMA_ALPHA_X1000)
		+ BANDIT_EMA_SCALE / 2) /
	       BANDIT_EMA_SCALE;
}

/*
 * Translate the --strategy=NAME argument into a picker_mode_t.
 * Recognises the human-friendly aliases ("round-robin", "rr",
 * "bandit", "ucb1", "bandit-ucb1").  Returns false on unknown
 * input so the caller can emit a tidy error before exiting.
 */
bool parse_picker_mode(const char *name, enum picker_mode_t *out)
{
	if (name == NULL || out == NULL)
		return false;

	if (strcmp(name, "round-robin") == 0 ||
	    strcmp(name, "rr") == 0) {
		*out = PICKER_ROUND_ROBIN;
		return true;
	}
	if (strcmp(name, "bandit") == 0 ||
	    strcmp(name, "ucb1") == 0 ||
	    strcmp(name, "bandit-ucb1") == 0) {
		*out = PICKER_BANDIT_UCB1;
		return true;
	}
	return false;
}

const char *picker_mode_name(enum picker_mode_t mode)
{
	switch (mode) {
	case PICKER_ROUND_ROBIN:	return "round-robin";
	case PICKER_BANDIT_UCB1:	return "bandit-ucb1";
	}
	return "unknown";
}

const char *strategy_name(int arm)
{
	switch (arm) {
	case STRATEGY_HEURISTIC:		return "HEURISTIC";
	case STRATEGY_RANDOM:			return "RANDOM";
	case STRATEGY_COVERAGE_FRONTIER:	return "COVERAGE_FRONTIER";
	default:				return "?";
	}
}

bool is_strategy_eligible(int arm)
{
	if (arm < 0 || arm >= NR_STRATEGIES)
		return false;
	return true;
}

/*
 * Record the just-finished window: bump pull count for the arm that
 * was active, add its edge yield plus the CMP-novelty term to the
 * cumulative reward, and update the diagnostic CMP-share running sum.
 * Called by the CAS-winning child during maybe_rotate_strategy(),
 * which serialises with the picker — so picker-side reads of
 * bandit_pulls[] / bandit_reward_calls[] / bandit_reward_pc_edge_count[]
 * / bandit_cmp_share_sum_x1000[] inside pick_next_strategy() are
 * covered by the strategy-switch store's release semantics on the
 * next CAS winner.
 *
 * dump_strategy_stats() is parent-side and runs outside the CAS
 * protocol, so it can race a child that is mid-update here.  To keep
 * its per-arm reads from tearing against these writes, the writes use
 * __atomic_fetch_add(..., RELAXED) and dump_strategy_stats() pairs
 * them with __atomic_load_n(..., RELAXED).  Relaxed is sufficient: the
 * dump is a diagnostic snapshot with no ordering requirement against
 * other shm fields.  The stats counter bump uses an atomic add for the
 * same reason.
 *
 * pc_edge_calls is the per-window pc_edge_calls_by_strategy delta
 * (calls with >=1 new edge).  pc_edge_count is the parallel
 * pc_edge_count_by_strategy delta (real bucket-edge bits flipped).
 * cmp_new_constants is the per-window bandit_cmp_new_constants delta.
 *
 * The learner-facing combined reward is
 *   pc_edge_calls + cmp_new_constants / CMP_BANDIT_REWARD_WEIGHT_RECIPROCAL
 * (integer division — sub-weight CMP residues round to zero so a
 * window with a handful of novel constants doesn't perturb the
 * headline PC signal).  The pc_edge_count delta accumulates into the
 * parallel diagnostic reward (no cmp term folded in) so the operator
 * can compare what the real-count reward would have looked like.
 */
void bandit_record_pull(int arm, enum strategy_selection_reason reason,
			unsigned long pc_edge_calls,
			unsigned long pc_edge_count,
			unsigned long cmp_new_constants,
			unsigned long warn_fires,
			bool was_chaos)
{
	unsigned long cmp_term;
	unsigned long trans_term;
	unsigned long total;
	unsigned int chaos_idx;
	int i;

	if (arm < 0 || arm >= NR_STRATEGIES)
		return;
	if (reason < 0 || reason >= NR_SELECTION_REASONS)
		return;

	cmp_term = cmp_new_constants / CMP_BANDIT_REWARD_WEIGHT_RECIPROCAL;

	/* Transition-reward window delta.  Read from shm->stats directly
	 * rather than threaded through a new parameter so the function
	 * signature (and its declaration in include/strategy.h) stays
	 * unchanged.  The transition window-start snapshot is reseeded
	 * during the rotation handler in maybe_rotate_strategy (random-
	 * syscall.c) at the same RELAXED cadence as the pc_edge_*_at_
	 * window_start pair; computing the delta here is symmetric to how
	 * maybe_rotate_strategy computes pc_edge_calls / pc_edge_count
	 * before calling in.
	 *
	 * trans_term divides the raw transition delta by TRANSITION_BANDIT_
	 * REWARD_WEIGHT_RECIPROCAL (= 4, a 0.25 secondary weight matching
	 * the CMP-term shape) and is added only under COMBINED mode.  The
	 * per-call cap that prevents one pathological trace from
	 * monopolizing the window lives at the source bump in random-
	 * syscall.c (TRANSITION_PER_CALL_REWARD_CAP); no additional cap
	 * is needed here.  Under SHADOW_ONLY/OFF trans_term stays zero so
	 * the bandit reward total is byte-identical to the pre-knob
	 * baseline. */
	if (__atomic_load_n(&kcov_transition_reward_mode,
			    __ATOMIC_RELAXED) ==
	    KCOV_TRANSITION_REWARD_COMBINED) {
		unsigned long trans_now = __atomic_load_n(
			&shm->stats.transition_edge_count_by_strategy[arm],
			__ATOMIC_RELAXED);
		unsigned long trans_start = __atomic_load_n(
			&shm->stats.transition_edge_count_at_window_start,
			__ATOMIC_RELAXED);
		unsigned long trans_delta = (trans_now >= trans_start) ?
					     (trans_now - trans_start) : 0UL;

		trans_term = trans_delta /
			     TRANSITION_BANDIT_REWARD_WEIGHT_RECIPROCAL;
	} else {
		trans_term = 0;
	}

	total = pc_edge_calls + cmp_term + trans_term;

	/* Always bucket the just-finished window by (arm, reason) before
	 * any cohort-gated learner update.  These matrices are diagnostic
	 * (the picker does not score against them) so they capture every
	 * window including SR_PLATEAU_FORCE -- intervention reward is the
	 * exact signal a future plateau-rescue classifier wants to read
	 * back, and excluding it here would silently zero the cohort the
	 * classifier is meant to study. */
	__atomic_fetch_add(&shm->bandit_pulls_by_reason[arm][reason], 1UL,
			   __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->bandit_reward_calls_by_reason[arm][reason],
			   total, __ATOMIC_RELAXED);
	__atomic_fetch_add(
		&shm->bandit_reward_pc_edge_count_by_reason[arm][reason],
		pc_edge_count, __ATOMIC_RELAXED);

	/* SR_PLATEAU_FORCE windows skip the learner-facing updates: an
	 * intervention window ran STRATEGY_RANDOM because every arm was
	 * stalled, which is structurally different from "RANDOM scored
	 * best under UCB" (the bandit had no input on the pick).  Folding
	 * the forced window into bandit_pulls[] / bandit_reward_calls[] /
	 * the recent_*_x1000 EMA contaminates the learner so the bandit
	 * can't tell policy-chosen RANDOM windows from forced RANDOM
	 * windows once the plateau clears.  The caller-side guard that
	 * used to gate this call moved in here so the by-reason bucketing
	 * above stays unconditional. */
	if (reason == SR_PLATEAU_FORCE)
		return;

	/* Per-arm x chaos-state cohort attribution -- chaos-mode V2.  Sits
	 * below the SR_PLATEAU_FORCE early-return so the by_chaos cohort
	 * sums reconcile with the flat bandit_pulls[] / bandit_reward_calls[]
	 * totals updated immediately below: an intervention window that
	 * lands inside a chaos window must drop from both cohorts together,
	 * otherwise sum_arm sum_chaos bandit_pulls_by_chaos[arm][c] runs
	 * ahead of sum_arm bandit_pulls[arm] by the SR_PLATEAU_FORCE count
	 * and the operator-facing cohort ratio drifts.
	 *
	 * was_chaos is sampled by the caller in maybe_rotate_strategy
	 * before cmp_hints_chaos_tick advances the schedule, so it reflects
	 * the chaos state in effect across the just-finished window rather
	 * than the state for the upcoming one.  The warn_fires delta is the
	 * caller-computed kmsg_warn_fires_now - kmsg_warn_fires_at_window_
	 * start delta for the same window.  Observation only -- no consumer
	 * in select_next_strategy or ucb1_score reads these arrays; V3
	 * action mode will wire them into the picker once the significance
	 * gate from the design doc clears. */
	chaos_idx = was_chaos ? 1u : 0u;
	__atomic_fetch_add(&shm->bandit_pulls_by_chaos[arm][chaos_idx], 1UL,
			   __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->bandit_reward_calls_by_chaos[arm][chaos_idx],
			   total, __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->bandit_warn_fires_by_chaos[arm][chaos_idx],
			   warn_fires, __ATOMIC_RELAXED);

	__atomic_fetch_add(&shm->bandit_pulls[arm], 1UL, __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->bandit_reward_calls[arm], total,
			   __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->bandit_reward_pc_edge_count[arm],
			   pc_edge_count, __ATOMIC_RELAXED);

	/* Discounted "recent" series update.  Decay every arm's counters
	 * by gamma = 1 - alpha first, THEN credit the active arm with one
	 * effective pull and the window's reward.  Decaying all arms (not
	 * just the pulled one) is what keeps the UCB1 explore-term
	 * denominator meaningful under discounting -- an arm that stops
	 * being picked must see its effective sample count shrink so the
	 * picker eventually re-tries it.  Single writer (CAS-serialised
	 * rotation path) so plain reads of the old values are safe;
	 * RELEASE stores back so the parent-side dump's RELAXED loads see
	 * complete values rather than torn intermediates.  This update
	 * runs alongside the lifetime fields above; ucb1_score() reads the
	 * recent series for both exploit and explore terms (D-UCB), while
	 * cold-start in pick_next_strategy() reads the lifetime by-reason
	 * aggregate (sum across r of bandit_pulls_by_reason[i][r]) -- "never
	 * observed by any path, including SR_PLATEAU_FORCE" rather than
	 * "not observed lately" -- and the lifetime fields also back
	 * dump_strategy_stats. */
	for (i = 0; i < NR_STRATEGIES; i++) {
		unsigned long p = __atomic_load_n(&shm->recent_pulls_x1000[i],
						  __ATOMIC_RELAXED);
		unsigned long r = __atomic_load_n(&shm->recent_reward_x1000[i],
						  __ATOMIC_RELAXED);

		__atomic_store_n(&shm->recent_pulls_x1000[i],
				 bandit_ema_decay(p), __ATOMIC_RELAXED);
		__atomic_store_n(&shm->recent_reward_x1000[i],
				 bandit_ema_decay(r), __ATOMIC_RELAXED);
	}
	__atomic_fetch_add(&shm->recent_pulls_x1000[arm], BANDIT_EMA_SCALE,
			   __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->recent_reward_x1000[arm],
			   total * BANDIT_EMA_SCALE, __ATOMIC_RELAXED);

	if (cmp_term == 0)
		return;

	__atomic_fetch_add(&shm->stats.bandit_cmp_reward_added, 1UL,
			   __ATOMIC_RELAXED);

	/* Per-arm running sum of cmp_term's share of the combined reward,
	 * scaled to parts per thousand.  Total is non-zero here because
	 * cmp_term > 0.  Averaged at end-of-run to surface the empirical
	 * CMP weighting per arm so the 0.25 constant can be tuned. */
	__atomic_fetch_add(&shm->bandit_cmp_share_sum_x1000[arm],
			   (cmp_term * 1000UL) / total,
			   __ATOMIC_RELAXED);
}

/*
 * Per-syscall comparison-constant novelty bloom.
 *
 * Two complementary 64-bit hashes derive ten-bit indices into a
 * 1024-bit (128-byte) bloom filter per syscall.  The multipliers are
 * splitmix64-style high-entropy odd constants; XOR-folding the upper
 * half into the index defeats the multiplier's tendency to leave low
 * bits weakly mixed when val itself has small magnitude (true for
 * many comparison constants the kernel checks against -- length
 * limits, error codes, magic numbers in the first few KiB of address
 * space).  Two hashes is the textbook efficient point for k below
 * ~3% FPR at the loads expected here (a few hundred distinct
 * constants per syscall over the K-window decay).
 */
static inline uint32_t cmp_bloom_h1(unsigned long val)
{
	uint64_t x = (uint64_t)val * 0x9e3779b97f4a7c15ULL;
	x ^= x >> 32;
	return (uint32_t)(x & 0x3FF);
}

static inline uint32_t cmp_bloom_h2(unsigned long val)
{
	uint64_t x = (uint64_t)val * 0xbf58476d1ce4e5b9ULL;
	x ^= x >> 32;
	x ^= x >> 16;
	return (uint32_t)(x & 0x3FF);
}

/*
 * Same "boring constant" filter cmp_hints.c uses (interesting_value).
 * Kept private here so the two filters can drift independently if it
 * turns out the novelty signal benefits from a different threshold
 * than the hint-substitution pool.
 */
static bool cmp_novelty_interesting(unsigned long val)
{
	if (val == 0 || val == 1)
		return false;
	if (val == (unsigned long) -1)
		return false;
	if (val < 4)
		return false;
	return true;
}

/*
 * Lazy bloom decay.  If the entry's window_tag is more than
 * CMP_NOVELTY_DECAY_WINDOWS rotations behind the current rotation
 * counter, zero the bloom and republish the tag.  CAS protects against
 * multiple children racing to clear the same entry — the loser sees
 * the new tag on the second load and skips the clear.  Children that
 * race the clear and observe a half-zeroed bloom may miss-credit a
 * constant as novel; that is benign for a noisy diagnostic counter
 * and self-correcting on the next observation.
 */
static void cmp_bloom_maybe_decay(struct cmp_novelty_entry *e,
				  uint32_t now)
{
	uint32_t tag = __atomic_load_n(&e->window_tag, __ATOMIC_RELAXED);

	if (now - tag <= CMP_NOVELTY_DECAY_WINDOWS)
		return;

	if (!__atomic_compare_exchange_n(&e->window_tag, &tag, now,
					 false,
					 __ATOMIC_RELAXED, __ATOMIC_RELAXED))
		return;

	memset((void *)e->bloom, 0, sizeof(e->bloom));
}

/*
 * Test-and-set one bloom bit.  Returns true if the bit was previously
 * clear (the constant is novel along this hash).  Bits are atomic at
 * byte granularity; the caller treats "either hash bit was clear" as
 * "constant is novel".
 */
static bool cmp_bloom_set(uint8_t *bloom, uint32_t bit)
{
	uint8_t mask = (uint8_t)(1U << (bit & 7));
	uint8_t prev = __atomic_fetch_or(&bloom[bit >> 3], mask,
					 __ATOMIC_RELAXED);

	return (prev & mask) == 0;
}

unsigned long bandit_cmp_observe(unsigned long *trace_buf, unsigned int nr,
				bool do32, bool is_explorer,
				int strategy_at_pick)
{
	struct cmp_novelty_entry *e;
	unsigned long count, i;
	unsigned long novel = 0;
	uint32_t now;

	if (trace_buf == NULL || nr >= MAX_NR_SYSCALL)
		return 0;

	count = __atomic_load_n(&trace_buf[0], __ATOMIC_RELAXED);
	if (count == 0)
		return 0;
	if (count > KCOV_CMP_RECORDS_MAX)
		count = KCOV_CMP_RECORDS_MAX;

	e = &shm->cmp_novelty[nr][do32 ? 1 : 0];
	now = (uint32_t)__atomic_load_n(&shm->bandit_window_count,
					__ATOMIC_RELAXED);
	cmp_bloom_maybe_decay(e, now);

	for (i = 0; i < count; i++) {
		unsigned long type = trace_buf[1 + i * WORDS_PER_CMP];
		unsigned long arg1 = trace_buf[1 + i * WORDS_PER_CMP + 1];
		bool novel_here;

		if (!(type & KCOV_CMP_CONST))
			continue;

		/* arg1 is the compile-time constant under KCOV_CMP_CONST
		 * (clang/gcc's __sanitizer_cov_trace_const_cmpN convention);
		 * arg2 is the runtime value the kernel compared it against
		 * and would just credit the bandit for novelty in the
		 * fuzzer's own input distribution. */
		if (!cmp_novelty_interesting(arg1))
			continue;

		novel_here = cmp_bloom_set(e->bloom, cmp_bloom_h1(arg1));
		novel_here = cmp_bloom_set(e->bloom, cmp_bloom_h2(arg1)) ||
			     novel_here;
		if (novel_here)
			novel++;
	}

	if (novel == 0)
		return 0;

	/* Per-arm bandit attribution is gated separately from the return
	 * value: explorer-pool children run a different strategy than
	 * whatever the bandit picked, so crediting their CMP novelty into
	 * bandit_cmp_new_constants[] would misattribute their work and bias
	 * the bandit's reward calculation.  Out-of-range strategy_at_pick
	 * (including the -1 sentinel for pre-first-pick) likewise skips
	 * attribution.  The bloom updates above and the `novel` return
	 * still propagate so the global novelty horizon stays consistent
	 * across the fleet and so callers can use CMP novelty from
	 * explorers as a corpus-save signal.
	 *
	 * Attribute to the arm that PICKED the syscall, snapshotted in
	 * set_syscall_nr().  Re-reading shm->current_strategy here would
	 * misattribute any call whose syscall started under one arm and
	 * completed under another (rotation lands mid-syscall) -- frequent
	 * for long or blocking syscalls. */
	if (!is_explorer &&
	    strategy_at_pick >= 0 && strategy_at_pick < NR_STRATEGIES) {
		__atomic_fetch_add(
			&shm->bandit_cmp_new_constants[strategy_at_pick],
			novel, __ATOMIC_RELAXED);
	}

	return novel;
}

/*
 * Per-syscall frontier-edge ring accessors.
 *
 * The ring is a fixed-width window of FRONTIER_DECAY_WINDOWS slots per
 * syscall; the slot currently being filled is (frontier_slot &
 * (FRONTIER_DECAY_WINDOWS - 1)).  Producers (kcov_collect on the
 * new-edge branch) atomic-add into the current slot.  The rotation hook
 * advances the slot index and zeroes the slot it just moved into, so
 * sums across the ring give the trailing K-window frontier count for
 * each syscall -- effectively a sliding window with discrete decay.
 *
 * FRONTIER_DECAY_WINDOWS is currently 8 (see strategy.h); the AND-mask
 * approach assumes it stays a power of two -- enforced by the
 * static_assert below so a future change to a non-pot value fails at
 * compile time rather than silently producing wrong slot indices.
 */
_Static_assert((FRONTIER_DECAY_WINDOWS &
		(FRONTIER_DECAY_WINDOWS - 1)) == 0,
	       "FRONTIER_DECAY_WINDOWS must be a power of two");

void frontier_record_new_edge(unsigned int nr)
{
	uint32_t slot;
	unsigned long w;
	unsigned int cached;

	if (nr >= MAX_NR_SYSCALL)
		return;

	slot = __atomic_load_n(&shm->frontier_slot, __ATOMIC_RELAXED) &
	       (FRONTIER_DECAY_WINDOWS - 1);
	__atomic_fetch_add(&shm->frontier_history[nr][slot], 1U,
			   __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->frontier_recent_count_cached[nr], 1U,
			   __ATOMIC_RELAXED);

	/* RedQueen-source PC-edge attribution.  When the call that produced
	 * this new edge was a replay of a corpus entry whose args were
	 * originally captured under in_reexec (i.e. the RedQueen re-exec
	 * path harvested those args), credit the win to the dedicated
	 * rq_sourced_pcedge_wins_per_syscall[] counter so the periodic
	 * dump can report PC-edge conversion of RedQueen-sourced saves
	 * separately from the bulk per-strategy attribution.  Observability
	 * only -- no selection / reward code reads this counter.  RELAXED
	 * add-fetch matches the surrounding accounting. */
	{
		struct childdata *cc = this_child();

		if (cc != NULL && cc->replay_rq_sourced)
			__atomic_fetch_add(
				&shm->stats.rq_sourced_pcedge_wins_per_syscall[nr],
				1UL, __ATOMIC_RELAXED);

		/* errno-gradient-save conversion counter.  Sibling of the
		 * rq_sourced bump above for the errno-source provenance lane.
		 * Bumped only when the call that produced this PC win was a
		 * replay of a corpus entry whose errno_sourced flag was set --
		 * i.e. a downstream PC-edge win from an errno-gradient save.
		 * Observability only; cumulative-diagnostic semantics matches
		 * the rest of the strategy.c accounting. */
		if (cc != NULL && cc->replay_errno_sourced)
			__atomic_fetch_add(
				&shm->stats.errno_sourced_pcedge_wins_per_syscall[nr],
				1UL, __ATOMIC_RELAXED);
	}

	/* SHADOW-ONLY silent-streak reset.  This function is the canonical
	 * per-syscall new-edge productive-event hook -- already called from
	 * kcov_collect()'s found_new branch when a syscall's call has
	 * produced at least one fresh bucket bit, which is also the path
	 * that contributes the positive local_distinct_pcs delta the
	 * coverage-frontier picker treats as the "productive" signal.
	 * Resetting the silent-streak counter here therefore reuses the
	 * existing per-syscall productive-edge collection site -- no new
	 * collection path is added.
	 *
	 * The counter and the global frontier_shadow_decay_candidates it
	 * edge-triggers in random-syscall.c's silent-regime accept site
	 * are observability-only: no live selection or scoring code reads
	 * them, so the reset cannot perturb the picker distribution. */
	__atomic_store_n(
		&shm->stats.frontier_silent_streak_per_syscall[nr],
		0UL, __ATOMIC_RELAXED);

	/* SHADOW-ONLY no-novelty baseline snapshot, paired with the streak
	 * reset above.  Snapshots the current value of the two non-PC-edge
	 * novelty signals (per-syscall CMP-pool inserts and per-syscall
	 * SUCCESS-bucket errno count) so the silent-regime accept site can
	 * detect whether either fired during the next silent streak via a
	 * cheap current-vs-baseline equality test.  A streak reset here is
	 * the right moment to refresh the baselines: by construction this
	 * call also bumped per_syscall_edges, so PC-edge novelty IS the
	 * reset event and the no-novelty UNLESS clause is being re-armed
	 * for the next streak.
	 *
	 * Same observability-only contract as the streak counter above --
	 * no selection or scoring code reads these baselines, only the
	 * shadow decay predicate at the silent-regime accept site does.
	 * kcov_shm NULL-checked because frontier_record_new_edge is
	 * unreachable without a coverage trace under collection, but the
	 * guard matches the pattern other kcov_shm consumers follow. */
	if (kcov_shm != NULL) {
		unsigned long cmp_snap, errno_snap;

		cmp_snap = __atomic_load_n(
			&kcov_shm->per_syscall_cmp_inserts[nr],
			__ATOMIC_RELAXED);
		errno_snap = __atomic_load_n(
			&kcov_shm->per_syscall_errno[nr][ERRNO_BUCKET_SUCCESS],
			__ATOMIC_RELAXED);
		__atomic_store_n(
			&shm->stats.frontier_silent_cmp_baseline[nr],
			cmp_snap, __ATOMIC_RELAXED);
		__atomic_store_n(
			&shm->stats.frontier_silent_errno_success_baseline[nr],
			errno_snap, __ATOMIC_RELAXED);
	}

	/* Ratchet the cached max upward if this bump pushed nr's recent
	 * count past it.  No CAS: a racing producer that also raises the
	 * max can clobber our store with its (also-correct) value, and a
	 * racing rotation will overwrite with the authoritative recompute.
	 * Both outcomes leave the cache within one window's slack. */
	w = frontier_recent_count(nr);
	if (w > UINT_MAX)
		w = UINT_MAX;
	cached = __atomic_load_n(&shm->frontier_max_weight_cached,
				 __ATOMIC_RELAXED);
	if ((unsigned int)w > cached)
		__atomic_store_n(&shm->frontier_max_weight_cached,
				 (unsigned int)w, __ATOMIC_RELAXED);
}

/*
 * Transition-discovery sibling of frontier_record_new_edge().  Bumps
 * the same per-syscall frontier-edge ring + cached max + silent-streak
 * reset triple, treating a transition-slot flip as evidence that the
 * syscall is currently producing fresh control-flow coverage.  The
 * canonical signal pattern this commit promotes is "PC-edge discovery
 * plateaued while transition discovery still moves" -- so under
 * COMBINED mode the frontier ring needs to be pushed up for syscalls
 * producing transitions but no fresh PC bucket bits, otherwise the
 * silent-regime picker steers away from exactly the syscalls that are
 * still earning the post-plateau coverage.
 *
 * Caller-side gates (in random-syscall.c at the kcov_collect call
 * site) handle the kcov_transition_reward_mode == COMBINED and
 * !child->is_explorer and !child->kcov.remote_mode filters before
 * invoking, so this function only sees calls that should bump the
 * ring.  The RedQueen-source PC-edge attribution branch in
 * frontier_record_new_edge() is deliberately omitted here -- that
 * counter measures PC-edge wins from RedQueen-sourced corpus replays
 * and a transition discovery is a different signal.  The silent-streak
 * reset DOES apply: a syscall producing transitions has demonstrably
 * been productive this window, which is the streak's reset semantics
 * (frontier_silent_streak_per_syscall is a "consecutive cold windows"
 * counter, agnostic to PC vs transition).
 */
void frontier_record_transition_edge(unsigned int nr)
{
	uint32_t slot;
	unsigned long w;
	unsigned int cached;

	if (nr >= MAX_NR_SYSCALL)
		return;

	slot = __atomic_load_n(&shm->frontier_slot, __ATOMIC_RELAXED) &
	       (FRONTIER_DECAY_WINDOWS - 1);
	__atomic_fetch_add(&shm->frontier_history[nr][slot], 1U,
			   __ATOMIC_RELAXED);
	__atomic_fetch_add(&shm->frontier_recent_count_cached[nr], 1U,
			   __ATOMIC_RELAXED);

	__atomic_store_n(
		&shm->stats.frontier_silent_streak_per_syscall[nr],
		0UL, __ATOMIC_RELAXED);

	/* SHADOW-ONLY no-novelty baseline snapshot.  Mirror of the snapshot
	 * pair in frontier_record_new_edge(): a transition-edge discovery
	 * is also a productive event for the streak's purposes, so the
	 * decay predicate's UNLESS-clause baselines re-arm here too.  Same
	 * observability-only contract; see the matching block in
	 * frontier_record_new_edge() for full rationale. */
	if (kcov_shm != NULL) {
		unsigned long cmp_snap, errno_snap;

		cmp_snap = __atomic_load_n(
			&kcov_shm->per_syscall_cmp_inserts[nr],
			__ATOMIC_RELAXED);
		errno_snap = __atomic_load_n(
			&kcov_shm->per_syscall_errno[nr][ERRNO_BUCKET_SUCCESS],
			__ATOMIC_RELAXED);
		__atomic_store_n(
			&shm->stats.frontier_silent_cmp_baseline[nr],
			cmp_snap, __ATOMIC_RELAXED);
		__atomic_store_n(
			&shm->stats.frontier_silent_errno_success_baseline[nr],
			errno_snap, __ATOMIC_RELAXED);
	}

	w = frontier_recent_count(nr);
	if (w > UINT_MAX)
		w = UINT_MAX;
	cached = __atomic_load_n(&shm->frontier_max_weight_cached,
				 __ATOMIC_RELAXED);
	if ((unsigned int)w > cached)
		__atomic_store_n(&shm->frontier_max_weight_cached,
				 (unsigned int)w, __ATOMIC_RELAXED);
}

unsigned long frontier_recent_count(unsigned int nr)
{
	if (nr >= MAX_NR_SYSCALL)
		return 0;

	return __atomic_load_n(&shm->frontier_recent_count_cached[nr],
			       __ATOMIC_RELAXED);
}

/*
 * Errno-plateau decay predicate for the coverage-frontier picker's
 * silent-regime accept site.  See the FRONTIER_ERRNO_PLATEAU_* contract
 * in include/strategy.h for the design rationale; the four novelty-
 * restore lanes (PC edge / transition / CMP / new-errno) are all the
 * predicate ever needs because the underlying counters are monotonic --
 * any productive event flips the predicate permanently false for that
 * syscall without requiring a per-syscall reset hook in frontier_
 * record_new_edge / frontier_record_transition_edge.
 *
 * Reads are RELAXED.  A torn or stale snapshot only shifts the predicate
 * by at most one call's worth of evidence, well inside the slack the
 * outer accept/retry loop already tolerates, and the inequality tests
 * cannot misclassify across the threshold by more than one increment.
 *
 * Returns false when kcov_shm is NULL or nr is out of range so the
 * caller's accept gate degrades to the historical accept distribution
 * rather than wedging on a NULL deref -- matches the kcov-less fallback
 * frontier_cold_weight already takes.
 */
bool frontier_errno_plateau_should_decay(unsigned int nr, bool do32)
{
	unsigned long calls, edges, cmp_inserts, transition_edges;
	unsigned long max_failure_bucket = 0;
	unsigned int bucket;

	if (kcov_shm == NULL || nr >= MAX_NR_SYSCALL)
		return false;

	/* Coordinate with the landed --cred-throttle gate: credential-class
	 * syscalls have their own EPERM/EINVAL dominance throttle keyed on
	 * cred_class_* counters in cred_throttle.c, and a credential-class
	 * pick already burns its rejection budget on that gate.  Excluding
	 * the set here keeps a credential syscall from being decayed by both
	 * gates in lock-step -- the cred-throttle reject lands first inside
	 * set_syscall_nr_coverage_frontier, so this predicate only ever sees
	 * a credential pick that the throttle already let through. */
	if (cred_class_for_nr(nr, do32) != CRED_CLASS_NR)
		return false;

	calls = __atomic_load_n(&kcov_shm->per_syscall_calls[nr],
				__ATOMIC_RELAXED);
	if (calls < FRONTIER_ERRNO_PLATEAU_MIN_CALLS)
		return false;

	/* PC-edge novelty lane.  per_syscall_edges has call-count semantics
	 * (see include/kcov.h): one bump per call that discovered at least
	 * one fresh bucket bit.  A non-zero value means the syscall has been
	 * productive in PC-coverage terms at least once across its lifetime,
	 * so the decay must release.  Counter is monotonic non-decreasing,
	 * so once edges > 0 the predicate is permanently false for nr. */
	edges = __atomic_load_n(&kcov_shm->per_syscall_edges[nr],
				__ATOMIC_RELAXED);
	if (edges > 0)
		return false;

	/* CMP novelty lane.  per_syscall_cmp_inserts counts fresh inserts
	 * and evict-replaces in cmp_hints' per-syscall pool (dedup-refresh
	 * hits do not count, matching the global counter's semantics).  A
	 * syscall producing CMP signal without PC-edge progress is still
	 * earning post-plateau coverage of a different shape, so don't
	 * decay it.  Monotonic non-decreasing same as the edges counter. */
	cmp_inserts = __atomic_load_n(&kcov_shm->per_syscall_cmp_inserts[nr],
				      __ATOMIC_RELAXED);
	if (cmp_inserts > 0)
		return false;

	/* Transition-novelty lane.  per_syscall_transition_edges_real_local
	 * counts local-mode trace transition-slot first-flips (a new
	 * (prev_canon_pc, cur_canon_pc) ordering observed) for this syscall.
	 * Like the CMP lane: a syscall flipping new transition slots is
	 * earning control-flow coverage the PC bitmap misses, so the decay
	 * must release.  Monotonic non-decreasing. */
	transition_edges = __atomic_load_n(
		&kcov_shm->per_syscall_transition_edges_real_local[nr],
		__ATOMIC_RELAXED);
	if (transition_edges > 0)
		return false;

	/* New-errno novelty lane.  Scan the 7 non-SUCCESS buckets and find
	 * the dominant one.  SUCCESS is excluded from the dominance test
	 * because a syscall returning SUCCESS but bumping no PC edges is
	 * still exercising kernel state worth probing -- the decay targets
	 * syscalls whose calls the kernel is REJECTING with a single fixed
	 * errno.  Any new bucket (including a late SUCCESS) bumps per_
	 * syscall_calls and dilutes the dominant failure ratio below
	 * DOM_PCT, restoring the syscall to full sampling. */
	for (bucket = ERRNO_BUCKET_SUCCESS + 1;
	     bucket < ERRNO_BUCKET_NR; bucket++) {
		unsigned long c = __atomic_load_n(
			&kcov_shm->per_syscall_errno[nr][bucket],
			__ATOMIC_RELAXED);
		if (c > max_failure_bucket)
			max_failure_bucket = c;
	}
	/* Percent check rearranged to integer-safe form, matching the same
	 * shape cred_throttle_should_reject uses for HARD_FAIL_PCT. */
	if (max_failure_bucket * 100UL <
	    (unsigned long)FRONTIER_ERRNO_PLATEAU_DOM_PCT * calls)
		return false;

	return true;
}

void frontier_window_advance(void)
{
	uint32_t cur, next;
	unsigned int nr;
	unsigned long max_weight = 0;

	/* Clear-then-publish, the opposite of the previous order.  The old
	 * code bumped frontier_slot first and then aged out the slot it had
	 * just published, which opened a window in which a producer could
	 * (a) add into the new slot before we cleared it, (b) have that
	 * write exchanged back to zero, and (c) issue its cached-sum
	 * increment AFTER our subtract.  In the worst case the rotator's
	 * fetch_sub ran with an old_slot value that was larger than the
	 * cached running sum -- because part of the producer's contribution
	 * was already in the slot but not yet in the cached counter -- so
	 * the subtract wrapped negative and the cached count flipped to a
	 * near-UINT32_MAX weight.  That bogus weight is consumed by
	 * random-syscall.c's frontier roulette wheel; an arm-wide blow-up
	 * either collapses the wheel onto one syscall or pushes the
	 * rejection sampler into an effectively-uniform reject loop.
	 *
	 * We now compute the next slot index without publishing it, age out
	 * the slot's contents from every per-nr running sum while no
	 * producer is targeting that slot (frontier_slot still points to
	 * the previous slot), and only then bump frontier_slot.  A producer
	 * racing the rotation keeps adding into the previous slot for a
	 * handful of instructions -- a bounded window-boundary attribution
	 * error -- instead of having its addition silently dropped or
	 * inverting the cached sum.
	 *
	 * The saturating subtract is kept as a hard guard: even with the
	 * reorder, a CAS-clamped update means a producer that races our
	 * read-modify-write on cached can't drive it negative.  Hitting the
	 * clamp bumps frontier_underflow_prevented -- the metric is
	 * expected to read zero in steady state. */
	cur = __atomic_load_n(&shm->frontier_slot, __ATOMIC_RELAXED);
	next = (cur + 1U) & (FRONTIER_DECAY_WINDOWS - 1);

	for (nr = 0; nr < MAX_NR_SYSCALL; nr++) {
		uint32_t old_slot;
		uint32_t old_cached;
		uint32_t new_sum;

		old_slot = __atomic_exchange_n(&shm->frontier_history[nr][next],
					       0U, __ATOMIC_RELAXED);

		/* CAS loop so a concurrent producer's fetch_add against the
		 * cached counter cannot be lost and cannot underflow the
		 * sum.  Producers should not be racing this nr at this
		 * point (frontier_slot still names the previous slot) but
		 * the loop costs at most a handful of retries and removes
		 * the underflow case unconditionally. */
		old_cached = __atomic_load_n(
			&shm->frontier_recent_count_cached[nr],
			__ATOMIC_RELAXED);
		for (;;) {
			if (old_cached >= old_slot)
				new_sum = old_cached - old_slot;
			else
				new_sum = 0;
			if (__atomic_compare_exchange_n(
				    &shm->frontier_recent_count_cached[nr],
				    &old_cached, new_sum, false,
				    __ATOMIC_RELAXED, __ATOMIC_RELAXED))
				break;
		}
		if (old_cached < old_slot)
			__atomic_add_fetch(
				&shm->stats.frontier_underflow_prevented,
				1UL, __ATOMIC_RELAXED);
		if (new_sum > max_weight)
			max_weight = new_sum;
	}

	/* Publish the new slot only after every per-nr clear has landed.
	 * From this point producers see the freshly-zeroed slot. */
	__atomic_store_n(&shm->frontier_slot, cur + 1U, __ATOMIC_RELAXED);

	if (max_weight > UINT_MAX)
		max_weight = UINT_MAX;
	__atomic_store_n(&shm->frontier_max_weight_cached,
			 (unsigned int)max_weight, __ATOMIC_RELAXED);
}

/*
 * Compute the UCB1 score for one arm.  Discounted formulation
 * (D-UCB): the lifetime bandit_pulls[]/bandit_reward_calls[] series
 * is replaced by the rolling exponentially-decayed counters
 * recent_pulls_x1000[]/recent_reward_x1000[] so the picker tracks
 * recent yield instead of the lifetime average.  Kernel coverage
 * discovery is non-stationary -- early windows mine out easy edges
 * and late windows degrade -- so an arm's last few windows are the
 * relevant signal, not its 2024 mean.
 *
 *     score_i = mean_reward_i / norm + c * sqrt(ln(N) / n_i)
 *
 * where:
 *   mean_reward_i = recent_reward_x1000[i] / recent_pulls_x1000[i]
 *                   (the x1000 fixed-point cancels, leaving the
 *                   discounted mean reward in the original calls
 *                   units the lifetime series used)
 *   norm          = max over arms of mean_reward_j (or 1 if all zero)
 *   N             = sum across arms of n_j (effective discounted
 *                   sample size, the D-UCB analogue of total_pulls)
 *   n_i           = recent_pulls_x1000[i] / 1000.0
 *   c             = UCB1_EXPLORATION_C
 *
 * Normalising the exploit term by the largest observed mean keeps it
 * in roughly the same range as the exploration term (~1) regardless
 * of whether per-window rewards are in the hundreds or hundreds of
 * thousands.  Without normalisation, a UCB1 picker over edges-per-
 * window degenerates into "always pick the arm with the highest
 * cumulative average" because the exploit term dwarfs sqrt(ln/n).
 *
 * The reward signal consumed here is still the CALL-COUNT series
 * (recent_reward_x1000[] is the discounted form of
 * bandit_reward_calls[] -- calls-with-≥1-edge plus the weighted CMP
 * novelty term).  The parallel real bucket-count series
 * (bandit_reward_pc_edge_count[]) remains a lifetime diagnostic and
 * does not feed the score; switching the learner to a discounted
 * bucket-count signal is a separate decision once both signals have
 * been observed under discounting against real run data.
 *
 * pulls_x1000 == 0 clamps to 1: an arm whose discounted count
 * decayed all the way to zero (≈140 windows of no selection at
 * gamma=0.95) is by definition starved, the resulting huge explore
 * term is the correct outcome.  The clamp keeps the division
 * well-defined rather than relying on a separate guard.
 */
static double ucb1_score(int arm, double total_n, double norm)
{
	unsigned long pulls_x1000 = __atomic_load_n(&shm->recent_pulls_x1000[arm],
						    __ATOMIC_RELAXED);
	unsigned long reward_x1000 = __atomic_load_n(&shm->recent_reward_x1000[arm],
						     __ATOMIC_RELAXED);
	double n_i, mean, exploit, explore;

	if (pulls_x1000 == 0)
		pulls_x1000 = 1;

	n_i = (double)pulls_x1000 / (double)BANDIT_EMA_SCALE;
	mean = (double)reward_x1000 / (double)pulls_x1000;
	exploit = mean / norm;
	explore = UCB1_EXPLORATION_C * sqrt(log(total_n) / n_i);

	return exploit + explore;
}

/*
 * Sum across reasons of the by-reason pull matrix.  Used by the