Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -116,38 +116,6 @@ The script will:
- Deduplicate against existing entities
- Display confirmation with the total count

### Step 5: Assess Influence of Recalled Entities

Regardless of whether Step 4 saved new entities, judge whether the guidelines the recall hook served to *this* session were actually followed, contradicted, or simply irrelevant. This closes the provenance loop: the recall hook records *what* was served; this step records *what effect* it had.

1. Derive this session's `session_id` from the `saved_trajectory_path` extracted in Step 0: strip the directory prefix and the `claude-transcript_` / `.jsonl` affixes. For `.evolve/trajectories/claude-transcript_abc-123.jsonl` the `session_id` is `abc-123`.

2. Read `.evolve/audit.log` (JSONL, one object per line). Find every line where `event == "recall"` and `session_id` matches. Take the union of their `entities` arrays — that is the set of guideline identifiers served to this session. Each identifier is a relative path from `.evolve/entities/` without the `.md` suffix (e.g. `guideline/foo` for a local entity, or `subscribed/alice/guideline/foo` for a subscribed one), so it unambiguously names one file. If the set is empty, skip this step.

3. For each identifier, open `.evolve/entities/<id>.md` with the Read tool. Read its content + trigger — that is the guideline's intent. Skip the identifier (log it as an assessment-less entry) if the file is not found.

4. Compare against the transcript loaded in Step 0. For each identifier, pick one verdict:
- `followed` — the agent's actual actions are consistent with the guideline's recommendation.
- `contradicted` — the guideline's trigger matched the task but the agent did the opposite, or hit the dead end the guideline would have prevented.
- `not_applicable` — the guideline's trigger didn't match what this session was about.

Keep `evidence` to one short sentence citing a specific action or tool call from the transcript.

5. Emit one JSON payload and pipe it to the helper:

```bash
echo '{
"session_id": "<session-id>",
"assessments": [
{"entity": "guideline/<slug>", "verdict": "followed", "evidence": "Agent imported struct and parsed APP1 directly"}
]
}' | python3 ${CLAUDE_PLUGIN_ROOT}/skills/learn/scripts/log_influence.py
```

The `entity` value must match exactly what appeared in the recall event — include the `subscribed/<source>/` prefix if the entity came from a subscribed repo.

Emit zero assessments (empty `assessments` list) when no recall events exist for this session.

## Quality Gate

Before saving, review each entity against this checklist:
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@

# Add lib to path so we can import entity_io
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent / "lib"))
from entity_io import find_recall_entity_dirs, get_evolve_dir, markdown_to_entity, log as _log
import audit
from entity_io import find_recall_entity_dirs, markdown_to_entity, log as _log


def log(message):
Expand Down Expand Up @@ -83,13 +82,6 @@ def load_entities_with_source(entities_dir):
entity = markdown_to_entity(md)
if not entity.get("content"):
continue
# Record the on-disk path relative to entities_dir (without the
# .md suffix) as a qualified identifier. This distinguishes
# same-named entities in different trees — e.g.
# "guideline/foo" (local) vs "subscribed/alice/guideline/foo"
# (from a subscribed repo) — so downstream auditing doesn't
# collapse them into one.
entity["_id"] = str(md.relative_to(entities_dir).with_suffix(""))
# Detect subscribed entities by path: .../entities/subscribed/{name}/...
parts = md.parts
try:
Expand Down Expand Up @@ -137,24 +129,6 @@ def main():
print(output)
log(f"Output {len(output)} chars to stdout")

# Audit: record which entities were served to which session. Must not
# fail the hook if logging errors — recall is the user-visible path.
try:
transcript_path = input_data.get("transcript_path", "")
session_id = Path(transcript_path).stem if transcript_path else None
entity_ids = sorted({e["_id"] for e in entities if e.get("_id")})
if session_id and entity_ids:
project_root = get_evolve_dir().resolve().parent
audit.append(
project_root=str(project_root),
event="recall",
session_id=session_id,
entities=entity_ids,
)
log(f"Audit: recall session_id={session_id} entities={len(entity_ids)}")
except Exception as exc:
log(f"Audit append failed (non-fatal): {exc}")


if __name__ == "__main__":
main()
33 changes: 0 additions & 33 deletions tests/e2e/test_sandbox_learn_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,36 +164,3 @@ def test_learn_then_recall_flow(sandbox_ready, sandbox_workspace):
# pip-installed). Other libraries (PIL, piexif, exifread) may appear in a
# valid guideline as "install via pip and use", so we don't ban them.
assert not re.search(r"\bexiftool\b", joined), "session 2 invoked exiftool despite recall guideline:\n" + "\n".join(commands)

# --- Usage provenance: audit.log should record recall + influence ---
audit_log = sandbox_workspace / ".evolve" / "audit.log"
assert audit_log.is_file(), f"{audit_log} was not created — recall did not append audit events"

events = []
for line in audit_log.read_text().splitlines():
line = line.strip()
if not line:
continue
events.append(json.loads(line))

session2_id = session2_transcript.stem.removeprefix("claude-transcript_")
# Recall audit records qualified ids — path relative to .evolve/entities/
# without the .md suffix — so we match session 1's entities the same way.
session1_ids = {str(p.relative_to(entities_dir).with_suffix("")) for p in entity_files}

recall_events = [e for e in events if e.get("event") == "recall" and e.get("session_id") == session2_id]
assert recall_events, f"no recall audit event for session 2 ({session2_id}). all events: {events}"
recalled_ids = {eid for e in recall_events for eid in e.get("entities", [])}
assert recalled_ids & session1_ids, f"recall event entities {recalled_ids} did not include any id from session 1 ({session1_ids})"
log.info(f"session 2: audit recorded recall of {recalled_ids}")

influence_events = [e for e in events if e.get("event") == "influence" and e.get("session_id") == session2_id]
assert influence_events, (
f"no influence audit event for session 2 ({session2_id}). recall events exist but learn did not emit assessments."
)
for ie in influence_events:
assert ie.get("verdict") in {"followed", "contradicted", "not_applicable"}, f"influence event has invalid verdict: {ie}"
log.info(
f"session 2: audit recorded {len(influence_events)} influence assessment(s): "
f"{[(e['entity'], e['verdict']) for e in influence_events]}"
)
Loading
Loading