diff --git a/CHANGELOG.md b/CHANGELOG.md index c17766f..492e7be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -112,6 +112,48 @@ least one table — the gate never starves retrieval of all candidates. Phase 1-C (smooth sigmoid read-gate threshold) is the remaining planned follow-up before the Phase 4 enforcement readiness review. +### Added — issue #116 Phase 1-C: smooth sigmoid read-gate (observability slice) + +The memo (§1.4 Stage 2) argues that a hard `--limit N` truncation of +results is both noise-sensitive (an item whose relevance score +fluctuates slightly around the cutoff flips in and out +unpredictably) and non-learnable (zero gradient at the boundary +gives a feedback loop nothing to update against). The memo's +recommendation is a smooth sigmoid threshold with learnable slope +and midpoint, calibrated from operational data. + +This release ships the **observability slice** of that +recommendation — the math module + a `_sigmoid_rank_weight` field +attached to every result item. Purely additive: rank order is +unchanged, no items are dropped, the bench harness sees identical +output structure modulo the new per-item field. Learning the slope +and midpoint from accumulating outcomes is Phase 4 territory and is +not implemented here. + +- **`agentmemory.sigmoid_gate`** — new module exposing + `SigmoidParams`, `sigmoid(x, slope, midpoint)`, + `normalize_rank(rank, total)`, `weight_for_rank(rank, total)`, + `weights_for_results(total)`, `annotate_with_weights(items)`. + Pure math, no DB access. Conservative defaults + (`slope=6.0, midpoint=0.5`) chosen per memo §1.4 Stage 2 ("Start + with a shallow slope and a conservative midpoint, then tighten + as data accumulates"). + +- **`cmd_search` hookpoint** — annotates each surfaced result item + with `_sigmoid_rank_weight` derived from its 1-based rank in the + bucket. Failures swallow so a bad gate cannot break search. + +- **Tests** — `tests/test_sigmoid_gate.py`. 14 tests covering + endpoint behavior, midpoint crossing, monotonicity, extreme + overflow handling, SigmoidParams validation, rank normalization, + bucket annotation, idempotence (does not overwrite existing + weights), and graceful handling of non-dict items. + +Phase 4 enforcement readiness review remains the next planned step, +gated on accumulating 4+ weeks of pathway-log data before deciding +whether the BG/cerebellum learning loops are calibrated enough to +flip from shadow to enforcement. + ## [2.7.0] — 2026-05-13 — *Procedural memory layer (Velamj, PR #94)* The third memory type. brainctl now treats Tulving's 1972 tripartite diff --git a/src/agentmemory/_impl.py b/src/agentmemory/_impl.py index 568de0e..eac978a 100644 --- a/src/agentmemory/_impl.py +++ b/src/agentmemory/_impl.py @@ -7553,6 +7553,25 @@ def _normalize_bucket_scores(bucket_name): elif _debug_mode: _out["_debug"] = {"all_signals_informative": True} + # Issue #116 Phase 1-C: smooth sigmoid read-gate (observability slice). + # Annotate each surfaced result item with a sigmoid-weighted + # confidence-in-relevance score derived from its 1-based rank + # position. Purely additive — does NOT change rank order, does NOT + # drop any items. Downstream consumers (BG/cerebellum learning + # loops, agents, future sigmoid-aware rerankers) can decide what + # to do with the weight. Failures swallow so a bad gate cannot + # break search. + try: + from agentmemory.sigmoid_gate import ( + annotate_with_weights as _sg_annotate, + DEFAULT_PARAMS as _SG_DEFAULTS, + ) + for _bucket_key, _bucket_rows in results.items(): + if isinstance(_bucket_rows, list) and _bucket_rows: + _sg_annotate(_bucket_rows, params=_SG_DEFAULTS) + except Exception: # pragma: no cover — defensive + pass + # Issue #116 Phase 1-A: emit one row to retrieval_pathway_log capturing the # pathway fingerprint of this retrieval. Best-effort; never blocks the # return path. Gated behind BRAINCTL_PATHWAY_LOG env var. See diff --git a/src/agentmemory/sigmoid_gate.py b/src/agentmemory/sigmoid_gate.py new file mode 100644 index 0000000..dc546b0 --- /dev/null +++ b/src/agentmemory/sigmoid_gate.py @@ -0,0 +1,175 @@ +"""Smooth sigmoid read-gate — issue #116 Phase 1-C (observability slice). + +The memo (issue #116 §1.4 Stage 2) argues that a hard `--limit N` +truncation of search results is both noise-sensitive (an item whose +relevance score fluctuates slightly around the cutoff flips in and +out unpredictably) and non-learnable (zero gradient at the boundary +gives a feedback loop nothing to update against). + +The memo's recommendation is a smooth sigmoid threshold with +learnable slope and midpoint, calibrated from operational data. + +This module ships the **observability slice** of that recommendation: + + - Pure-math sigmoid + per-rank weight helpers. + - Conservative default parameters (shallow slope, mid midpoint). + - A `weight_for_rank(rank, total)` helper that surfaces a soft + confidence-in-relevance per result item without changing the + item's actual rank position. + +The intended use at this stage is **additive** — `cmd_search` +attaches a `_sigmoid_rank_weight` field to each returned item so +downstream consumers (BG/cerebellum learning loops, agents, future +sigmoid-aware rerankers) can experiment with using it without any +risk of regressing the existing rank order or the bench harness. + +Learning the slope / midpoint from accumulating outcomes is Phase 4 +territory and not implemented here. The conservative defaults are +chosen so that the surfaced weights are useful as a relative ordering +signal but do not over-commit to any particular calibration before +the data is in. + +See also: + - research/issue-116-audit-vs-origin-main.md §6.2.4 — "Smooth + sigmoid read-gate threshold" + - research/brainctl-brain-architecture-issue-116.md §1.4 Stage 2 +""" +from __future__ import annotations + +import math +from dataclasses import dataclass +from typing import Iterable + +# Conservative defaults — chosen for "shallow slope, conservative +# midpoint" per memo §1.4 Stage 2 ("Start with a shallow slope and +# a conservative midpoint, then tighten as data accumulates"). +# +# With slope=6.0 and midpoint=0.5 over normalized rank ∈ [0, 1]: +# rank 1 of 5 (normalized 1.0 ) → weight ≈ 0.953 +# rank 3 of 5 (normalized 0.5 ) → weight = 0.5 +# rank 5 of 5 (normalized 0.0 ) → weight ≈ 0.047 +# rank 1 of 10 (normalized 1.0 ) → weight ≈ 0.953 +# rank 5 of 10 (normalized 0.555) → weight ≈ 0.583 +# rank 10 of 10 (normalized 0.0) → weight ≈ 0.047 +# +# A shallower slope produces a flatter weight curve that distributes +# uncertainty more evenly across positions; a steeper slope sharpens +# the in/out distinction. The Phase 4 calibration step will fit these +# from accumulated outcomes. +DEFAULT_SLOPE = 6.0 +DEFAULT_MIDPOINT = 0.5 + + +@dataclass(frozen=True) +class SigmoidParams: + """A (slope, midpoint) pair. Frozen so it can be passed around and + cached without surprise mutations.""" + slope: float = DEFAULT_SLOPE + midpoint: float = DEFAULT_MIDPOINT + + def __post_init__(self) -> None: + # Defensive — keeps callers honest about what shapes are + # meaningful. midpoint must be in (0, 1) so rank-position + # normalization stays well-defined; slope must be > 0 so the + # function is monotone increasing in x. + if not (0.0 < self.midpoint < 1.0): + raise ValueError( + f"midpoint must be in (0, 1); got {self.midpoint!r}" + ) + if self.slope <= 0.0: + raise ValueError(f"slope must be > 0; got {self.slope!r}") + + +DEFAULT_PARAMS = SigmoidParams() + + +def sigmoid(x: float, *, slope: float = DEFAULT_SLOPE, + midpoint: float = DEFAULT_MIDPOINT) -> float: + """Smooth threshold function. + + Returns a value in (0, 1) that crosses 0.5 at x = midpoint and + approaches 1.0 (resp. 0.0) for x well above (resp. below) midpoint. + + The slope controls how sharp the transition is — large slope + approximates a hard cutoff, small slope produces a gentle ramp. + """ + # math.exp can overflow for huge negative exponents; guard. + z = -slope * (x - midpoint) + if z > 500: + return 0.0 + if z < -500: + return 1.0 + return 1.0 / (1.0 + math.exp(z)) + + +def normalize_rank(rank: int, total: int) -> float: + """Map a 1-based rank position (1=best) inside a result set of + `total` items to a normalized score in [0, 1] where 1.0 is the top + of the list and 0.0 is the bottom. + + Edge cases: + - total <= 0 → 0.5 (degenerate input; refuse to commit) + - total == 1, rank == 1 → 1.0 (the only item is the top) + - rank outside [1, total] is clamped to that range. + """ + if total <= 0: + return 0.5 + if total == 1: + return 1.0 + if rank < 1: + rank = 1 + elif rank > total: + rank = total + # rank=1 → 1.0, rank=total → 0.0, linear in between. + return (total - rank) / (total - 1) + + +def weight_for_rank(rank: int, total: int, + *, params: SigmoidParams = DEFAULT_PARAMS) -> float: + """Sigmoid weight for an item at 1-based `rank` in a result set + of `total` items. Composition of `normalize_rank` + `sigmoid`.""" + return sigmoid( + normalize_rank(rank, total), + slope=params.slope, + midpoint=params.midpoint, + ) + + +def weights_for_results(total: int, + *, params: SigmoidParams = DEFAULT_PARAMS) -> list[float]: + """Compute the sigmoid weight at every rank position 1..total. + + Returns a list of length `total` indexed by zero-based position + (i.e. result[0] holds the weight for rank 1). Convenient for + callers that hand out a ranked list and want to attach weights + in one pass. + """ + if total <= 0: + return [] + return [weight_for_rank(rank, total, params=params) + for rank in range(1, total + 1)] + + +def annotate_with_weights(items: Iterable[dict], + *, + weight_key: str = "_sigmoid_rank_weight", + params: SigmoidParams = DEFAULT_PARAMS, + ) -> list[dict]: + """Mutate each dict in `items` by attaching a sigmoid weight at + `weight_key`, keyed by 1-based rank in iteration order. + + Items that already carry a value at `weight_key` are left + untouched — the gate does not overwrite an explicit upstream + decision. Returns the same list (passed through) for chaining. + """ + items_list = list(items) + total = len(items_list) + if total == 0: + return items_list + for idx, item in enumerate(items_list, start=1): + if not isinstance(item, dict): + continue + if weight_key in item: + continue + item[weight_key] = weight_for_rank(idx, total, params=params) + return items_list diff --git a/tests/test_sigmoid_gate.py b/tests/test_sigmoid_gate.py new file mode 100644 index 0000000..f237022 --- /dev/null +++ b/tests/test_sigmoid_gate.py @@ -0,0 +1,140 @@ +"""Tests for sigmoid_gate — issue #116 Phase 1-C. + +Covers: + - SigmoidParams validation + - sigmoid() endpoint behavior + midpoint crossing + - normalize_rank() edge cases + - weight_for_rank() ordering + - weights_for_results() shape + - annotate_with_weights() mutation + idempotence +""" +from __future__ import annotations + +import pytest + +from agentmemory.sigmoid_gate import ( + DEFAULT_MIDPOINT, + DEFAULT_PARAMS, + DEFAULT_SLOPE, + SigmoidParams, + annotate_with_weights, + normalize_rank, + sigmoid, + weight_for_rank, + weights_for_results, +) + + +def test_sigmoid_crosses_05_at_midpoint(): + # At x == midpoint, sigmoid output is exactly 0.5 regardless of slope. + assert sigmoid(0.5, slope=1.0, midpoint=0.5) == pytest.approx(0.5) + assert sigmoid(0.7, slope=12.0, midpoint=0.7) == pytest.approx(0.5) + + +def test_sigmoid_approaches_endpoints(): + # Far above midpoint → 1; far below → 0. + assert sigmoid(100.0, slope=1.0, midpoint=0.0) > 0.999 + assert sigmoid(-100.0, slope=1.0, midpoint=0.0) < 0.001 + + +def test_sigmoid_handles_extreme_overflow(): + # The internal exp() must not raise on extreme inputs. + assert sigmoid(1e9, slope=1e6, midpoint=0.0) == pytest.approx(1.0) + assert sigmoid(-1e9, slope=1e6, midpoint=0.0) == pytest.approx(0.0) + + +def test_sigmoid_monotone_increasing(): + # Strictly monotone in x for fixed positive slope. + samples = [sigmoid(x / 10.0) for x in range(0, 11)] + for a, b in zip(samples, samples[1:]): + assert a < b + + +def test_sigmoid_params_validates_midpoint(): + with pytest.raises(ValueError): + SigmoidParams(slope=1.0, midpoint=0.0) + with pytest.raises(ValueError): + SigmoidParams(slope=1.0, midpoint=1.0) + with pytest.raises(ValueError): + SigmoidParams(slope=1.0, midpoint=-0.1) + + +def test_sigmoid_params_validates_slope(): + with pytest.raises(ValueError): + SigmoidParams(slope=0.0, midpoint=0.5) + with pytest.raises(ValueError): + SigmoidParams(slope=-1.0, midpoint=0.5) + + +def test_normalize_rank_endpoints_and_edges(): + # rank=1 (best) maps to 1.0; rank=total maps to 0.0 + assert normalize_rank(1, 10) == 1.0 + assert normalize_rank(10, 10) == 0.0 + # Linear interior — rank 5 of 9 should be exactly 0.5 + assert normalize_rank(5, 9) == pytest.approx(0.5) + # Singletons + assert normalize_rank(1, 1) == 1.0 + # Degenerate / clamping + assert normalize_rank(5, 0) == 0.5 + assert normalize_rank(-3, 10) == 1.0 + assert normalize_rank(99, 10) == 0.0 + + +def test_weight_for_rank_orders_with_position(): + # In a 10-item set, rank 1 must outweigh rank 10 (strictly). + w_top = weight_for_rank(1, 10) + w_bot = weight_for_rank(10, 10) + assert w_top > w_bot + # And the middle item lands near 0.5 with default midpoint=0.5. + w_mid = weight_for_rank(5, 9) + assert w_mid == pytest.approx(0.5, abs=0.01) + + +def test_weights_for_results_shape_and_monotone(): + weights = weights_for_results(10) + assert len(weights) == 10 + # Strictly decreasing along rank order (best-first). + for a, b in zip(weights, weights[1:]): + assert a > b + # All in (0, 1). + for w in weights: + assert 0.0 < w < 1.0 + + +def test_weights_for_results_empty(): + assert weights_for_results(0) == [] + assert weights_for_results(-3) == [] + + +def test_annotate_with_weights_mutates_in_place_and_preserves_order(): + items = [{"id": 1}, {"id": 2}, {"id": 3}] + out = annotate_with_weights(items) + assert out is not items # returns a list copy + assert [d["id"] for d in out] == [1, 2, 3] + # Weights present, monotone-decreasing + weights = [d["_sigmoid_rank_weight"] for d in out] + assert all(0.0 < w < 1.0 for w in weights) + for a, b in zip(weights, weights[1:]): + assert a > b + + +def test_annotate_does_not_overwrite_existing_weight(): + items = [{"id": 1, "_sigmoid_rank_weight": 0.42}, {"id": 2}] + out = annotate_with_weights(items) + assert out[0]["_sigmoid_rank_weight"] == 0.42 # untouched + assert "_sigmoid_rank_weight" in out[1] # new + + +def test_annotate_skips_non_dict_items(): + items = [{"id": 1}, "not-a-dict", {"id": 3}] + out = annotate_with_weights(items) + assert "_sigmoid_rank_weight" in out[0] + assert out[1] == "not-a-dict" + assert "_sigmoid_rank_weight" in out[2] + + +def test_default_params_have_sane_values(): + assert DEFAULT_PARAMS.slope == DEFAULT_SLOPE + assert DEFAULT_PARAMS.midpoint == DEFAULT_MIDPOINT + # And the defaults must satisfy the constructor's validation. + SigmoidParams(slope=DEFAULT_SLOPE, midpoint=DEFAULT_MIDPOINT)