Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 56 additions & 5 deletions briefing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,15 @@ def _ke_has_recurrence(db: sqlite3.Connection) -> bool:
return False


def _ke_has_last_accessed(db: sqlite3.Connection) -> bool:
"""Return True if knowledge_entries has the last_accessed_at column (v36 migration applied)."""
try:
cols = {row[1] for row in db.execute("PRAGMA table_info(knowledge_entries)").fetchall()}
return "last_accessed_at" in cols
except Exception:
return False


def _intensity_order_expr(alias: str = "ke", has_priority: bool = False) -> str:
"""SQL ORDER BY expression that ranks entries by priority then intensity.

Expand Down Expand Up @@ -1796,6 +1805,27 @@ def _recency_decay(last_seen_str: str | None, half_life_days: float = 30.0) -> f
return 1.0


def _decay_weight(last_accessed_at: str, half_life_days: int = 90) -> float:
"""Ebbinghaus exponential decay: exp(-ln(2) * days_since / half_life).

Returns 1.0 if last_accessed_at is empty (never accessed = no decay penalty yet).
Returns value in (0, 1] based on recency of last access.
"""
import math

if not last_accessed_at:
return 1.0
try:
last = datetime.datetime.fromisoformat(last_accessed_at.replace("Z", "+00:00"))
if last.tzinfo is None:
last = last.replace(tzinfo=datetime.timezone.utc)
now = datetime.datetime.now(datetime.timezone.utc)
days_since = max(0, (now - last).total_seconds() / 86400)
return math.exp(-math.log(2) * days_since / half_life_days)
except (ValueError, TypeError):
return 1.0


def _get_briefing_half_life(db: sqlite3.Connection) -> float:
"""Return the configured recency half-life in days.

Expand Down Expand Up @@ -1909,7 +1939,8 @@ def _recency_composite_score(entry: dict, half_life_days: float) -> float:
confidence_raw = entry.get("confidence")
intensity = float(confidence_raw) if confidence_raw is not None else 0.5
decay = _recency_decay(entry.get("last_seen"), half_life_days)
return priority_base + intensity * decay
access_decay = _decay_weight(entry.get("last_accessed_at", ""))
return priority_base + intensity * decay * access_decay


def search_knowledge_entries(
Expand Down Expand Up @@ -1938,13 +1969,15 @@ def search_knowledge_entries(
has_priority = _ke_has_priority(db)
has_recurrence = _ke_has_recurrence(db)
has_is_resolved = _ke_has_is_resolved(db)
has_last_accessed = _ke_has_last_accessed(db)
order_by = _intensity_order_expr("ke", has_priority) if has_intensity else "ke.confidence DESC, rank"
# Extra columns fetched so Python-level recency composite scoring has priority + intensity + age.
_rec_cols = ", COALESCE(ke.intensity, 0.5) as intensity, ke.last_seen" if has_intensity else ", ke.last_seen"
_priority_col = ", COALESCE(ke.priority, 'P2') as priority" if has_priority else ""
_recurrence_col = (
", COALESCE(ke.recurrence_after_briefing, 0) AS recurrence_after_briefing" if has_recurrence else ""
)
_last_accessed_col = ", ke.last_accessed_at" if has_last_accessed else ""

# Build optional date-filter clause and params
_date_clause = " AND ke.last_seen >= ?" if since_date else ""
Expand All @@ -1970,7 +2003,7 @@ def search_knowledge_entries(
d.doc_type as source_doc_type,
d.title as source_doc_title,
d.file_path as source_doc_file_path,
d.seq as source_doc_seq{_rec_cols}{_priority_col}{_recurrence_col}
d.seq as source_doc_seq{_rec_cols}{_priority_col}{_recurrence_col}{_last_accessed_col}
FROM ke_fts fts
JOIN knowledge_entries ke ON fts.rowid = ke.id
LEFT JOIN documents d ON ke.document_id = d.id
Expand All @@ -1988,7 +2021,7 @@ def search_knowledge_entries(
rows = db.execute(
f"""
SELECT ke.id, ke.title, ke.content, ke.tags,
ke.confidence, ke.session_id, ke.occurrence_count{_rec_cols}{_priority_col}{_recurrence_col}
ke.confidence, ke.session_id, ke.occurrence_count{_rec_cols}{_priority_col}{_recurrence_col}{_last_accessed_col}
FROM ke_fts fts
JOIN knowledge_entries ke ON fts.rowid = ke.id
WHERE ke_fts MATCH ?
Expand Down Expand Up @@ -2017,7 +2050,7 @@ def search_knowledge_entries(
d.doc_type as source_doc_type,
d.title as source_doc_title,
d.file_path as source_doc_file_path,
d.seq as source_doc_seq{_rec_cols}{_priority_col}{_recurrence_col}
d.seq as source_doc_seq{_rec_cols}{_priority_col}{_recurrence_col}{_last_accessed_col}
FROM ke_fts fts
JOIN knowledge_entries ke ON fts.rowid = ke.id
LEFT JOIN documents d ON ke.document_id = d.id
Expand All @@ -2035,7 +2068,7 @@ def search_knowledge_entries(
rows = db.execute(
f"""
SELECT ke.id, ke.title, ke.content, ke.tags,
ke.confidence, ke.session_id, ke.occurrence_count{_rec_cols}{_priority_col}{_recurrence_col}
ke.confidence, ke.session_id, ke.occurrence_count{_rec_cols}{_priority_col}{_recurrence_col}{_last_accessed_col}
FROM ke_fts fts
JOIN knowledge_entries ke ON fts.rowid = ke.id
WHERE ke_fts MATCH ?
Expand Down Expand Up @@ -2752,6 +2785,24 @@ def _entity_bonus(e: dict) -> float:
)
_upsert_entry_recall_stats(db, selected_entry_ids, rewritten_query)

# Update access tracking for surfaced entries (Ebbinghaus decay — issue #769)
try:
now_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
for entries in briefing_data.values():
for entry in entries:
eid = entry.get("id")
if eid:
try:
db.execute(
"UPDATE knowledge_entries SET last_accessed_at=?, access_count=access_count+1 WHERE id=?",
(now_iso, eid),
)
except sqlite3.OperationalError:
pass # Column not yet migrated — skip silently
db.commit()
except Exception:
pass # fail-open: access tracking is non-critical

db.close()

# Check if we have anything
Expand Down
9 changes: 9 additions & 0 deletions migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,6 +1813,15 @@ def _ensure_base_schema(db: sqlite3.Connection):
"CREATE INDEX IF NOT EXISTS idx_ke_ent_type_val ON knowledge_entities(entity_type, entity_value)",
],
),
(
38,
"knowledge_entry_access_tracking",
[
"ALTER TABLE knowledge_entries ADD COLUMN last_accessed_at TEXT DEFAULT ''",
"ALTER TABLE knowledge_entries ADD COLUMN access_count INTEGER DEFAULT 0",
"CREATE INDEX IF NOT EXISTS idx_ke_last_accessed ON knowledge_entries(last_accessed_at)",
],
),
]
applied = 0
for ver, name, stmts in MIGRATIONS:
Expand Down
78 changes: 77 additions & 1 deletion retro.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,70 @@ def _bar(value: float, width: int = 20) -> str:
return "█" * filled + "░" * (width - filled)


def _decay_weight(last_accessed_at: str, half_life_days: int = 90) -> float:
"""Ebbinghaus exponential decay: exp(-ln(2) * days_since / half_life).

Returns 1.0 if last_accessed_at is empty (never accessed = no decay penalty yet).
Returns value in (0, 1] based on recency of last access.
Copied verbatim from briefing.py — standalone scripts must not cross-import.
"""
import math

if not last_accessed_at:
return 1.0
try:
import datetime

last = datetime.datetime.fromisoformat(last_accessed_at.replace("Z", "+00:00"))
if last.tzinfo is None:
last = last.replace(tzinfo=datetime.timezone.utc)
now = datetime.datetime.now(datetime.timezone.utc)
days_since = max(0, (now - last).total_seconds() / 86400)
return math.exp(-math.log(2) * days_since / half_life_days)
except (ValueError, TypeError):
return 1.0


def _memory_health_section(db: sqlite3.Connection, days_threshold: int = 30) -> str:
"""Return a 'memory health' section showing stale entries by decay score."""
try:
rows = db.execute(
"""SELECT id, title, category, priority, last_accessed_at, access_count
FROM knowledge_entries
WHERE last_accessed_at != '' OR access_count > 0
ORDER BY last_accessed_at ASC
LIMIT 200"""
).fetchall()
except sqlite3.OperationalError:
return "" # Column not yet migrated

if not rows:
return ""

stale = []
for r in rows:
weight = _decay_weight(r[4] if not isinstance(r, sqlite3.Row) else r["last_accessed_at"])
if weight < 0.3:
stale.append(
{
"id": r[0] if not isinstance(r, sqlite3.Row) else r["id"],
"title": r[1] if not isinstance(r, sqlite3.Row) else r["title"],
"decay": round(weight, 3),
}
)

if not stale:
return ""

lines = [f"\n⚠️ Memory health — {len(stale)} stale entries (decay < 0.3):"]
for s in stale[:5]:
lines.append(f" #{s['id']} {s['title'][:60]} (decay={s['decay']})")
if len(stale) > 5:
lines.append(f" ... and {len(stale) - 5} more")
lines.append(" → Run: sk learn --refresh <id> to reset decay clock")
return "\n".join(lines)


def format_score_line(payload: dict) -> str:
score = payload.get("retro_score", 0)
grade = payload.get("grade", "")
Expand Down Expand Up @@ -1375,7 +1439,19 @@ def main() -> None:
section = args.get("subreport") or ""
print(format_subreport(payload, section))
else:
print(format_text_report(payload))
report = format_text_report(payload)
# Memory health section (Ebbinghaus decay — issue #769)
if mode != "repo" and KNOWLEDGE_DB.exists():
try:
_db = sqlite3.connect(str(KNOWLEDGE_DB))
_db.row_factory = sqlite3.Row
mem_health = _memory_health_section(_db)
_db.close()
if mem_health:
Comment thread
magicpro97 marked this conversation as resolved.
report = report + "\n" + mem_health
except Exception:
pass
print(report)


if __name__ == "__main__":
Expand Down
Loading