From 6108d210ce8e4ef43a56e2fccba779795639f6b2 Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Sun, 10 May 2026 11:58:27 -0400 Subject: [PATCH 1/9] feat(corpus-index): add PageIndex-style navigation tree + 5 new chat tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build a hierarchical index over zero/**/*.md by parsing # headers and slicing each section's line range; persist 446 nodes (22 H1 / 221 H2 / 181 H3 / 22 H4) to a new `corpus_node` SQLite table. Each node carries a placeholder summary (first sentence) for outline-time rendering. Wire five new tools into the tutor-chat tool set so Claude can navigate the corpus instead of guessing or being limited to whatever page the learner is on: - `get_corpus_outline({phase_id?, max_level?})` — H1+ tree as Markdown - `get_section(slug)` — verbatim Markdown of any node, with `did_you_mean` suggestions on miss - `search_corpus(query)` — substring + token-overlap ranker - `list_glossary(prefix?)` — read-side companion to add_glossary_term - `list_concepts({phase_id?, status?})` — ditto for mark_concept_status System prompt now injects the lean H1-only outline (~2 KB) alongside the current page's full text and the available concept slugs. The model is instructed to use search/get_section for cross-phase questions rather than hand-waving. End-to-end verified: cross-phase query "how does ion exchange in water treatment relate to crown-ether LLX in lithium-6 separation?" produced a precise multi-source answer in 3 tool calls (outline → search → get_section) with citations from both phases. Bad-slug path verified: get_section('atomic-hypothesis') returns not_found + 5 did_you_mean suggestions; the model self-corrects to the real concept slug. Why: the session simulation surfaced four friction points (whole-page injection brittleness, hallucinated identifiers, no TOC reasoning, glossary blindness). Research summarized at web/docs/pageindex-research.md recommended lifting the PageIndex *pattern* (TOC tree + 3 nav tools) into a TS-native, SQLite-resident reimplementation rather than running their Python pipeline or hosted MCP. This commit ships the half-day MVP. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/package.json | 1 + web/scripts/build-corpus-index.ts | 14 ++ web/scripts/check-corpus-sizes.ts | 18 +++ web/scripts/check-corpus.ts | 30 ++++ web/src/app/api/chat/route.ts | 94 ++++++++++++- web/src/app/api/sync/route.ts | 4 +- web/src/lib/content-loader.ts | 5 + web/src/lib/corpus-builder.ts | 221 +++++++++++++++++++++++++++++ web/src/lib/corpus-index.ts | 226 ++++++++++++++++++++++++++++++ web/src/lib/db.ts | 40 ++++++ web/src/lib/tutor-prompt.ts | 94 ++++++++++++- 11 files changed, 738 insertions(+), 9 deletions(-) create mode 100644 web/scripts/build-corpus-index.ts create mode 100644 web/scripts/check-corpus-sizes.ts create mode 100644 web/scripts/check-corpus.ts create mode 100644 web/src/lib/corpus-builder.ts create mode 100644 web/src/lib/corpus-index.ts diff --git a/web/package.json b/web/package.json index 42266a9..b8a0bd6 100644 --- a/web/package.json +++ b/web/package.json @@ -8,6 +8,7 @@ "start": "next start", "lint": "next lint", "ingest": "tsx scripts/ingest.ts", + "build-index": "tsx scripts/build-corpus-index.ts", "typecheck": "tsc --noEmit" }, "dependencies": { diff --git a/web/scripts/build-corpus-index.ts b/web/scripts/build-corpus-index.ts new file mode 100644 index 0000000..2bd4d23 --- /dev/null +++ b/web/scripts/build-corpus-index.ts @@ -0,0 +1,14 @@ +import { syncPagesToDb } from "../src/lib/content-loader"; +import { buildCorpusIndex } from "../src/lib/corpus-builder"; + +function main() { + console.log("[build-corpus-index] syncing pages…"); + const pages = syncPagesToDb(); + console.log("[build-corpus-index] pages:", pages); + + console.log("[build-corpus-index] building corpus tree…"); + const result = buildCorpusIndex(); + console.log("[build-corpus-index] result:", result); +} + +main(); diff --git a/web/scripts/check-corpus-sizes.ts b/web/scripts/check-corpus-sizes.ts new file mode 100644 index 0000000..a90ab28 --- /dev/null +++ b/web/scripts/check-corpus-sizes.ts @@ -0,0 +1,18 @@ +import { renderOutlineMarkdown } from "../src/lib/corpus-index"; + +const variants = [ + { name: "H1 only, 80 char", opts: { maxLevel: 1, summaryLen: 80 } }, + { name: "H1 only, 140 char", opts: { maxLevel: 1, summaryLen: 140 } }, + { name: "H1+H2, no summary", opts: { maxLevel: 2, summaryLen: 0 } }, + { name: "H1+H2, 60 char", opts: { maxLevel: 2, summaryLen: 60 } }, + { name: "H1+H2, 80 char", opts: { maxLevel: 2, summaryLen: 80 } }, + { name: "H1+H2+H3, no summary", opts: { maxLevel: 3, summaryLen: 0 } }, + { name: "H1+H2+H3, 60 char", opts: { maxLevel: 3, summaryLen: 60 } }, +]; +for (const v of variants) { + const txt = renderOutlineMarkdown(v.opts); + const lines = txt.split("\n").length; + console.log( + `${v.name.padEnd(28)} ${String(txt.length).padStart(6)} bytes / ${String(lines).padStart(4)} lines`, + ); +} diff --git a/web/scripts/check-corpus.ts b/web/scripts/check-corpus.ts new file mode 100644 index 0000000..258ccd0 --- /dev/null +++ b/web/scripts/check-corpus.ts @@ -0,0 +1,30 @@ +import { renderOutlineMarkdown, searchCorpus, getSection } from "../src/lib/corpus-index"; + +const compact = renderOutlineMarkdown({ maxLevel: 2, summaryLen: 80 }); +const full = renderOutlineMarkdown({ maxLevel: 6, summaryLen: 140 }); +console.log("compact:", compact.length, "bytes /", compact.split("\n").length, "lines"); +console.log("full: ", full.length, "bytes /", full.split("\n").length, "lines"); +const md = compact; +console.log("outline bytes:", md.length, "lines:", md.split("\n").length); +console.log("first 800 chars:\n" + md.slice(0, 800)); +console.log("\n--- search 'crown ether ion exchange' ---"); +for (const h of searchCorpus("crown ether ion exchange", 6)) { + console.log(` [${h.score}] ${h.slug} :: ${h.title}`); +} +console.log("\n--- search 'ion-exchange' ---"); +for (const h of searchCorpus("ion-exchange", 6)) { + console.log(` [${h.score}] ${h.slug} :: ${h.title}`); +} +console.log("\n--- getSection rigorous-statement ---"); +const s = getSection( + "01-chemistry-fundamentals/01-matter-and-atoms#rigorous-statement", +); +if (s.ok) { + console.log(`got ${s.word_count} words / lines ${s.line_start}-${s.line_end}`); + console.log(s.content.slice(0, 300)); +} else { + console.log("error:", JSON.stringify(s, null, 2)); +} +console.log("\n--- bad slug ---"); +const bad = getSection("01-chemistry-fundamentals/atomic-hypothesis"); +console.log(JSON.stringify(bad, null, 2)); diff --git a/web/src/app/api/chat/route.ts b/web/src/app/api/chat/route.ts index 75ea0ec..a698d17 100644 --- a/web/src/app/api/chat/route.ts +++ b/web/src/app/api/chat/route.ts @@ -11,7 +11,15 @@ import { upsertGlossaryEntry, } from "@/lib/repos"; import { ensureSeeded, getPageBySlug } from "@/lib/content-loader"; -import { listConceptsByPhase } from "@/lib/repos"; +import { + listConceptsByPhase, + listGlossary, +} from "@/lib/repos"; +import { + getSection, + renderOutlineMarkdown, + searchCorpus, +} from "@/lib/corpus-index"; import { streamChat, type AnthropicTool } from "@/lib/llm"; import { TUTOR_SYSTEM_PROMPT, @@ -31,6 +39,81 @@ const Body = z.object({ function buildTools(ctx: { sessionId: number; page_slug?: string | null }): AnthropicTool[] { return [ + { + ...TUTOR_TOOL_SCHEMAS.get_corpus_outline, + zod_shape: TUTOR_TOOL_ZOD.get_corpus_outline.shape as unknown as Record, + handler: async (input) => { + const data = input as { + phase_id?: string | null; + max_level?: number | null; + include_summaries?: boolean | null; + }; + const md = renderOutlineMarkdown({ + phase_id: data.phase_id ?? undefined, + maxLevel: data.max_level ?? 2, + summaryLen: data.include_summaries === false ? 0 : 80, + }); + return { ok: true, outline_markdown: md, byte_length: md.length }; + }, + }, + { + ...TUTOR_TOOL_SCHEMAS.get_section, + zod_shape: TUTOR_TOOL_ZOD.get_section.shape as unknown as Record, + handler: async (input) => { + const data = input as { slug: string }; + const r = getSection(data.slug); + return r; + }, + }, + { + ...TUTOR_TOOL_SCHEMAS.search_corpus, + zod_shape: TUTOR_TOOL_ZOD.search_corpus.shape as unknown as Record, + handler: async (input) => { + const data = input as { query: string; limit?: number | null }; + const hits = searchCorpus(data.query, data.limit ?? 8); + return { ok: true, hits }; + }, + }, + { + ...TUTOR_TOOL_SCHEMAS.list_glossary, + zod_shape: TUTOR_TOOL_ZOD.list_glossary.shape as unknown as Record, + handler: async (input) => { + const data = input as { prefix?: string | null }; + const rows = listGlossary(data.prefix ?? undefined); + return { + ok: true, + terms: rows.map((r) => ({ + term: r.term, + symbol: r.symbol, + units: r.units, + short_definition: r.definition.slice(0, 140), + })), + }; + }, + }, + { + ...TUTOR_TOOL_SCHEMAS.list_concepts, + zod_shape: TUTOR_TOOL_ZOD.list_concepts.shape as unknown as Record, + handler: async (input) => { + const data = input as { + phase_id?: string | null; + status?: "todo" | "exposed" | "comfortable" | "solid" | null; + }; + const rows = listConceptsByPhase(data.phase_id ?? undefined); + const filtered = data.status + ? rows.filter((c) => c.status === data.status) + : rows; + return { + ok: true, + concepts: filtered.map((c) => ({ + slug: c.slug, + label: c.label, + section: c.section, + status: c.status, + })), + }; + }, + }, { ...TUTOR_TOOL_SCHEMAS.add_glossary_term, zod_shape: TUTOR_TOOL_ZOD.add_glossary_term.shape as unknown as Record, @@ -158,7 +241,14 @@ export async function POST(req: NextRequest) { .map((c) => `- \`${c.slug}\` (${c.status}) — ${c.label}`) .join("\n") : ""; - const systemWithConcepts = TUTOR_SYSTEM_PROMPT + conceptsBlock; + + // PageIndex-style corpus outline. Lean: H1 only with first-sentence + // summaries (~2 KB). The model can call `get_corpus_outline(max_level=2)` to + // drill into a phase, or `get_section(slug)` to fetch any section verbatim. + const outlineMd = renderOutlineMarkdown({ maxLevel: 1, summaryLen: 140 }); + const outlineBlock = `\n\n# Corpus outline (table of contents)\nEvery page in the curriculum, slug → one-sentence summary. The current learner-visible page is included verbatim above; for ANY OTHER page or section, call:\n- \`get_corpus_outline({ phase_id?, max_level: 2 | 3 })\` to expand a phase down to subsections,\n- \`search_corpus(query)\` to find sections by keyword,\n- \`get_section(slug)\` to fetch verbatim Markdown of any node.\nDo NOT guess slugs — the outline + search are authoritative.\n\n${outlineMd}`; + + const systemWithConcepts = TUTOR_SYSTEM_PROMPT + outlineBlock + conceptsBlock; const history = listChatMessages(session.id); // The user message we just added is the last one; remove it from history. const history_for_llm = history diff --git a/web/src/app/api/sync/route.ts b/web/src/app/api/sync/route.ts index 4223ba7..b3b2da3 100644 --- a/web/src/app/api/sync/route.ts +++ b/web/src/app/api/sync/route.ts @@ -1,4 +1,5 @@ import { syncConceptsToDb, syncPagesToDb } from "@/lib/content-loader"; +import { buildCorpusIndex } from "@/lib/corpus-builder"; import { regenerateGlossaryFile, regenerateKnowledgeTrackerFile, @@ -10,8 +11,9 @@ export const runtime = "nodejs"; export async function POST() { const pages = syncPagesToDb(); const concepts = syncConceptsToDb(); + const corpus = buildCorpusIndex(); regenerateGlossaryFile(); regenerateKnowledgeTrackerFile(); regenerateQAFile(); - return Response.json({ pages, concepts }); + return Response.json({ pages, concepts, corpus }); } diff --git a/web/src/lib/content-loader.ts b/web/src/lib/content-loader.ts index 8aedbf9..95762a8 100644 --- a/web/src/lib/content-loader.ts +++ b/web/src/lib/content-loader.ts @@ -236,4 +236,9 @@ export function ensureSeeded() { if (pageCount === 0) syncPagesToDb(); const conceptCount = (db.prepare("SELECT COUNT(*) AS n FROM concepts").get() as { n: number }).n; if (conceptCount === 0) syncConceptsToDb(); + const nodeCount = (db.prepare("SELECT COUNT(*) AS n FROM corpus_node").get() as { n: number }).n; + if (nodeCount === 0) { + // Lazy-import to avoid pulling fs/crypto into edge bundles. + import("./corpus-builder").then((m) => m.buildCorpusIndex()).catch(() => {}); + } } diff --git a/web/src/lib/corpus-builder.ts b/web/src/lib/corpus-builder.ts new file mode 100644 index 0000000..701f705 --- /dev/null +++ b/web/src/lib/corpus-builder.ts @@ -0,0 +1,221 @@ +/** + * Build a hierarchical "PageIndex-style" navigation tree over the markdown + * corpus and persist it to SQLite (`corpus_node`). + * + * Nodes are derived from `#` headings: + * level 1 → the page itself + * level 2 → ## sections within the page + * level 3 → ### subsections + * + * Each node holds a line range [line_start, line_end] within the source file + * and a one-sentence `summary` placeholder (first non-heading sentence in the + * section). LLM-written summaries can backfill later via a separate pass. + */ +import crypto from "node:crypto"; +import { getDb, type CorpusNodeRow } from "./db"; +import { discoverPages } from "./content-loader"; +import { slugify } from "./slug"; + +type Heading = { + line: number; // 1-indexed + level: number; // 1, 2, 3... + title: string; +}; + +const HEADING_RE = /^(#{1,6})\s+(.+?)\s*$/; +// Lines inside ``` fences are not headings. +function findHeadings(source: string): Heading[] { + const lines = source.split(/\r?\n/); + const out: Heading[] = []; + let inFence = false; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (/^```/.test(line)) { + inFence = !inFence; + continue; + } + if (inFence) continue; + const m = line.match(HEADING_RE); + if (m) { + out.push({ line: i + 1, level: m[1].length, title: m[2].trim() }); + } + } + return out; +} + +function firstSentence(text: string): string { + // Skip the leading heading line; collapse to first sentence. + const body = text + .split(/\r?\n/) + .filter((l) => !/^\s*#/.test(l)) + .join(" ") + .replace(/\s+/g, " ") + .replace(/`+/g, "") + .trim(); + if (!body) return ""; + // Sentence boundary: . ! ? followed by space + capital, or end of string. + const m = body.match(/^(.{20,400}?[.!?])(\s|$)/); + return (m ? m[1] : body.slice(0, 240)).trim(); +} + +function wordCount(text: string): number { + return text.split(/\s+/).filter(Boolean).length; +} + +function hash(s: string): string { + return crypto.createHash("sha256").update(s).digest("hex").slice(0, 16); +} + +function dedupSlug(base: string, used: Set): string { + if (!used.has(base)) { + used.add(base); + return base; + } + let i = 2; + while (used.has(`${base}-${i}`)) i++; + used.add(`${base}-${i}`); + return `${base}-${i}`; +} + +export function buildCorpusIndex(): { added: number; total: number; pages: number } { + const db = getDb(); + const pages = discoverPages(); + const beforeCount = (db.prepare("SELECT COUNT(*) AS n FROM corpus_node").get() as { n: number }).n; + + const upsert = db.prepare(` + INSERT INTO corpus_node ( + slug, page_slug, phase_id, phase_number, file, parent_slug, level, title, + line_start, line_end, sort_order, summary, word_count, content_hash, updated_at + ) VALUES ( + @slug, @page_slug, @phase_id, @phase_number, @file, @parent_slug, @level, @title, + @line_start, @line_end, @sort_order, @summary, @word_count, @content_hash, datetime('now') + ) + ON CONFLICT(slug) DO UPDATE SET + page_slug = excluded.page_slug, + phase_id = excluded.phase_id, + phase_number = excluded.phase_number, + file = excluded.file, + parent_slug = excluded.parent_slug, + level = excluded.level, + title = excluded.title, + line_start = excluded.line_start, + line_end = excluded.line_end, + sort_order = excluded.sort_order, + summary = excluded.summary, + word_count = excluded.word_count, + content_hash = excluded.content_hash, + updated_at = datetime('now') + `); + + const deletePageNodes = db.prepare("DELETE FROM corpus_node WHERE page_slug = ?"); + + const tx = db.transaction(() => { + let globalOrder = 0; + for (const page of pages) { + // Wipe and re-insert this page's nodes — keeps the table clean across + // header renames. + deletePageNodes.run(page.slug); + + const lines = page.content.split(/\r?\n/); + const headings = findHeadings(page.content); + + // Synthesize a level-1 heading for the page if none exists. + const hasH1 = headings.some((h) => h.level === 1); + const effective: Heading[] = hasH1 + ? headings + : [{ line: 1, level: 1, title: page.title }, ...headings]; + + type Node = Heading & { + slug: string; + parent_slug: string | null; + line_end: number; + }; + const stack: Node[] = []; + const nodes: Node[] = []; + const usedSlugs = new Set(); + + for (let i = 0; i < effective.length; i++) { + const h = effective[i]; + // Pop until the top is strictly higher in level (smaller number) + while (stack.length && stack[stack.length - 1].level >= h.level) stack.pop(); + const parent = stack[stack.length - 1] ?? null; + + // Slug: + // level 1 → page.slug (matches pages.slug for joining) + // level 2+ → page.slug + "#" + slugified-heading (deduped per-page) + let slug: string; + if (h.level === 1 && parent === null) { + slug = page.slug; + } else { + const headSlug = slugify(h.title) || `section-${i}`; + const candidate = `${page.slug}#${headSlug}`; + slug = dedupSlug(candidate, usedSlugs); + } + + // line_end: until the next heading at level <= h.level, or EOF + let line_end = lines.length; + for (let j = i + 1; j < effective.length; j++) { + if (effective[j].level <= h.level) { + line_end = effective[j].line - 1; + break; + } + } + + nodes.push({ + ...h, + slug, + parent_slug: parent ? parent.slug : null, + line_end, + }); + stack.push({ ...h, slug, parent_slug: parent ? parent.slug : null, line_end }); + } + + for (const n of nodes) { + const sectionLines = lines.slice(n.line - 1, n.line_end); + const sectionText = sectionLines.join("\n"); + const summary = firstSentence(sectionText); + upsert.run({ + slug: n.slug, + page_slug: page.slug, + phase_id: page.phase_id, + phase_number: page.phase_number, + file: page.relative_path, + parent_slug: n.parent_slug, + level: n.level, + title: n.title, + line_start: n.line, + line_end: n.line_end, + sort_order: globalOrder++, + summary: summary || null, + word_count: wordCount(sectionText), + content_hash: hash(sectionText), + }); + } + } + }); + tx(); + + const after = (db.prepare("SELECT COUNT(*) AS n FROM corpus_node").get() as { n: number }).n; + return { added: after - beforeCount, total: after, pages: pages.length }; +} + +// Convenience accessors used by the rest of the app. + +export function listCorpusNodes(): CorpusNodeRow[] { + return getDb() + .prepare("SELECT * FROM corpus_node ORDER BY sort_order ASC") + .all() as CorpusNodeRow[]; +} + +export function getCorpusNode(slug: string): CorpusNodeRow | null { + return ( + (getDb().prepare("SELECT * FROM corpus_node WHERE slug = ?").get(slug) as CorpusNodeRow) ?? + null + ); +} + +export function listCorpusNodesByPhase(phase_id: string): CorpusNodeRow[] { + return getDb() + .prepare("SELECT * FROM corpus_node WHERE phase_id = ? ORDER BY sort_order ASC") + .all(phase_id) as CorpusNodeRow[]; +} diff --git a/web/src/lib/corpus-index.ts b/web/src/lib/corpus-index.ts new file mode 100644 index 0000000..cd3a63c --- /dev/null +++ b/web/src/lib/corpus-index.ts @@ -0,0 +1,226 @@ +/** + * Query helpers over the `corpus_node` tree built by `corpus-builder.ts`. + * + * These power the Claude tools (`get_corpus_outline`, `get_section`, + * `search_corpus`) and the system-prompt outline injection. + */ +import fs from "node:fs"; +import path from "node:path"; +import { CONTENT_DIR } from "./paths"; +import { getDb, type CorpusNodeRow } from "./db"; + +export type OutlineNode = { + slug: string; + title: string; + level: number; + parent_slug: string | null; + page_slug: string; + phase_id: string; + phase_number: number; + word_count: number; + summary: string | null; +}; + +export function getCorpusOutline(): OutlineNode[] { + return getDb() + .prepare( + `SELECT slug, title, level, parent_slug, page_slug, phase_id, phase_number, + word_count, summary + FROM corpus_node + ORDER BY sort_order ASC`, + ) + .all() as OutlineNode[]; +} + +/** + * Render the corpus outline as compact Markdown bullets nested by header level. + * Used for system-prompt injection. + * + * Defaults are tuned for prompt injection: H1+H2 only, 80-char summaries. + * For the full tree (including H3+), call with `maxLevel: 6`. + */ +export function renderOutlineMarkdown(opts: { + maxLevel?: number; + summaryLen?: number; + phase_id?: string; +} = {}): string { + const maxLevel = opts.maxLevel ?? 2; + const summaryLen = opts.summaryLen ?? 80; + const nodes = opts.phase_id + ? (getDb() + .prepare( + `SELECT slug, title, level, parent_slug, page_slug, phase_id, phase_number, + word_count, summary + FROM corpus_node + WHERE phase_id = ? + ORDER BY sort_order ASC`, + ) + .all(opts.phase_id) as OutlineNode[]) + : getCorpusOutline(); + + const lines: string[] = []; + for (const n of nodes) { + if (n.level > maxLevel) continue; + const indent = " ".repeat(Math.max(0, n.level - 1)); + const summary = + n.summary && summaryLen > 0 + ? ` — ${n.summary.slice(0, summaryLen)}${n.summary.length > summaryLen ? "…" : ""}` + : ""; + lines.push(`${indent}- \`${n.slug}\`${summary}`); + } + return lines.join("\n"); +} + +export type SectionResult = { + ok: true; + slug: string; + title: string; + page_slug: string; + phase_id: string; + level: number; + line_start: number; + line_end: number; + word_count: number; + content: string; +}; + +export type SectionError = { + ok: false; + error: "not_found"; + slug_attempted: string; + did_you_mean: Array<{ slug: string; title: string; reason: string }>; +}; + +export function getSection(slug: string): SectionResult | SectionError { + const node = getDb() + .prepare("SELECT * FROM corpus_node WHERE slug = ?") + .get(slug) as CorpusNodeRow | undefined; + + if (!node) { + return { + ok: false, + error: "not_found", + slug_attempted: slug, + did_you_mean: suggestSlugs(slug, 5), + }; + } + + const abs = path.join(CONTENT_DIR, node.file); + if (!fs.existsSync(abs)) { + return { + ok: false, + error: "not_found", + slug_attempted: slug, + did_you_mean: [], + }; + } + const fileText = fs.readFileSync(abs, "utf8"); + const lines = fileText.split(/\r?\n/); + const sliced = lines.slice(node.line_start - 1, node.line_end).join("\n"); + + return { + ok: true, + slug: node.slug, + title: node.title, + page_slug: node.page_slug, + phase_id: node.phase_id, + level: node.level, + line_start: node.line_start, + line_end: node.line_end, + word_count: node.word_count, + content: sliced, + }; +} + +export function suggestSlugs(slug: string, limit = 5): Array<{ slug: string; title: string; reason: string }> { + const needle = slug.toLowerCase(); + const tokens = needle + .split(/[/#\-_]+/) + .filter((t) => t.length > 2); + if (tokens.length === 0) return []; + + const all = getDb() + .prepare("SELECT slug, title FROM corpus_node") + .all() as Array<{ slug: string; title: string }>; + + const scored = all + .map((row) => { + const hay = `${row.slug.toLowerCase()} ${row.title.toLowerCase()}`; + const hits = tokens.filter((t) => hay.includes(t)).length; + return { ...row, score: hits }; + }) + .filter((r) => r.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, limit); + + return scored.map((r) => ({ + slug: r.slug, + title: r.title, + reason: `${r.score} of ${tokens.length} query tokens matched`, + })); +} + +export type SearchHit = { + slug: string; + title: string; + page_slug: string; + phase_id: string; + level: number; + summary: string | null; + score: number; +}; + +/** + * Substring + token-overlap search across titles and summaries. No vectors. + * Adequate for our small corpus; replaceable with FTS5 later if it gets slow. + */ +export function searchCorpus(query: string, limit = 8): SearchHit[] { + const q = query.trim().toLowerCase(); + if (!q) return []; + const tokens = Array.from( + new Set(q.split(/\s+/).filter((t) => t.length >= 3)), + ); + + const rows = getDb() + .prepare( + `SELECT slug, title, page_slug, phase_id, level, summary + FROM corpus_node`, + ) + .all() as Array<{ + slug: string; + title: string; + page_slug: string; + phase_id: string; + level: number; + summary: string | null; + }>; + + const scored = rows + .map((r) => { + const hay = `${r.title} ${r.summary ?? ""} ${r.slug}`.toLowerCase(); + let score = 0; + if (hay.includes(q)) score += 5; + for (const t of tokens) if (hay.includes(t)) score += 1; + // Prefer deeper sections (more specific) over root pages. + if (score > 0 && r.level > 1) score += 0.25; + return { ...r, score }; + }) + .filter((r) => r.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, limit); + + return scored; +} + +/** + * Extracts the unique top-level page slugs from the corpus, in reading order. + */ +export function listPageSlugs(): string[] { + return ( + getDb() + .prepare( + "SELECT slug FROM corpus_node WHERE level = 1 ORDER BY sort_order ASC", + ) + .all() as Array<{ slug: string }> + ).map((r) => r.slug); +} diff --git a/web/src/lib/db.ts b/web/src/lib/db.ts index 915dce1..6f8d712 100644 --- a/web/src/lib/db.ts +++ b/web/src/lib/db.ts @@ -120,6 +120,28 @@ function migrate(db: Database.Database) { key TEXT PRIMARY KEY, value TEXT NOT NULL ); + + CREATE TABLE IF NOT EXISTS corpus_node ( + slug TEXT PRIMARY KEY, + page_slug TEXT NOT NULL, + phase_id TEXT NOT NULL, + phase_number INTEGER NOT NULL, + file TEXT NOT NULL, + parent_slug TEXT, + level INTEGER NOT NULL, + title TEXT NOT NULL, + line_start INTEGER NOT NULL, + line_end INTEGER NOT NULL, + sort_order INTEGER NOT NULL, + summary TEXT, + word_count INTEGER NOT NULL DEFAULT 0, + content_hash TEXT NOT NULL, + updated_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + + CREATE INDEX IF NOT EXISTS idx_corpus_node_phase ON corpus_node(phase_id, sort_order); + CREATE INDEX IF NOT EXISTS idx_corpus_node_page ON corpus_node(page_slug, line_start); + CREATE INDEX IF NOT EXISTS idx_corpus_node_parent ON corpus_node(parent_slug); `); } @@ -206,3 +228,21 @@ export type ChatMessageRow = { tool_calls: string | null; created_at: string; }; + +export type CorpusNodeRow = { + slug: string; + page_slug: string; + phase_id: string; + phase_number: number; + file: string; + parent_slug: string | null; + level: number; + title: string; + line_start: number; + line_end: number; + sort_order: number; + summary: string | null; + word_count: number; + content_hash: string; + updated_at: string; +}; diff --git a/web/src/lib/tutor-prompt.ts b/web/src/lib/tutor-prompt.ts index 1cbccdf..863f00b 100644 --- a/web/src/lib/tutor-prompt.ts +++ b/web/src/lib/tutor-prompt.ts @@ -12,12 +12,19 @@ export const TUTOR_SYSTEM_PROMPT = `You are Tarek's chemistry tutor inside the L # Your job in this app 1. Answer questions grounded in the page Tarek is reading. The verbatim Markdown of that page is included in the system prompt under "# Current page". Treat it as primary source. -2. When something Tarek asks isn't on the current page but is reasonable to answer, answer it and explicitly note the source (your own knowledge vs. the page). -3. If Tarek asks for clarification, prefer Socratic prompting that pushes him to articulate the model himself — *unless* he is explicitly asking for an explanation, in which case give one. -4. When a key term, constant, or named effect comes up, use the **add_glossary_term** tool to persist it. Do this freely — the glossary is meant to grow. -5. When you produce a substantive Q&A exchange that deserves to live in the persistent log, call **append_qa**. Keep the answer concise (a paragraph or two with the load-bearing equation/definition); link out to the page if appropriate. -6. When Tarek demonstrates mastery (teach-back, predicts a consequence, explains in his own words), call **mark_concept_status** to promote the concept to "comfortable" or "solid". Do not promote on a single recognition — the bar is teach-back + consequence prediction. -7. At the end of a session, if the conversation produced meaningful progress, call **append_progress_log** with a 2–4 sentence summary and the list of promoted concepts. +2. When something Tarek asks crosses pages or phases (e.g., "how does ion exchange in water treatment relate to crown ethers in lithium separation?"), use the **corpus tools** to fetch the relevant other sections rather than guessing. The corpus is small (22 files, 446 nodes) and fully indexed — there's no excuse for hand-waving when the source is a tool call away. +3. When something Tarek asks isn't on the current page or in the corpus, answer from your own knowledge but explicitly mark it as such ("the page doesn't cover this; from general knowledge…"). +4. If Tarek asks for clarification, prefer Socratic prompting that pushes him to articulate the model himself — *unless* he is explicitly asking for an explanation, in which case give one. +5. When a key term, constant, or named effect comes up, use the **add_glossary_term** tool to persist it. Before adding, you may call **list_glossary** to check what's already there — but don't be precious about it; updating an existing entry is fine. +6. When you produce a substantive Q&A exchange that deserves to live in the persistent log, call **append_qa**. Keep the answer concise (a paragraph or two with the load-bearing equation/definition); link out to the page if appropriate. +7. When Tarek demonstrates mastery (teach-back, predicts a consequence, explains in his own words), call **list_concepts** if you don't know the available slugs, then **mark_concept_status** to promote the concept to "comfortable" or "solid". Do not promote on a single recognition — the bar is teach-back + consequence prediction. +8. At the end of a session, if the conversation produced meaningful progress, call **append_progress_log** with a 2–4 sentence summary and the list of promoted concepts. + +# Corpus navigation tools (PageIndex-style) +- **get_corpus_outline({ phase_id?, max_level: 2 | 3 })** — expand any phase to its sections / subsections. +- **search_corpus("keywords")** — find nodes by title + summary keyword match. +- **get_section("")** — fetch the verbatim Markdown of any node. Slugs come from the outline; never invent them. Bad slugs return a "did_you_mean" suggestion list. +Use these PROACTIVELY for any question that reaches outside the current page. Two cheap tool calls beat one wrong answer. # Style - Markdown. Use \`$inline$\` and \`$$display$$\` LaTeX for math. Tables and code blocks are fine. @@ -35,6 +42,81 @@ export const TUTOR_SYSTEM_PROMPT = `You are Tarek's chemistry tutor inside the L // directly; we also derive JSON schemas for the Anthropic-API path. export const TUTOR_TOOL_ZOD = { + get_corpus_outline: { + description: + "Return a hierarchical outline of the corpus (zero/) — the table of contents with optional summaries. Use to discover what's available BEFORE calling get_section. Defaults: all phases, levels 1–2 (page + section). Pass `phase_id` to scope, `max_level: 3` to drill into subsections.", + shape: { + phase_id: z + .string() + .nullable() + .optional() + .describe( + "Optional phase id (e.g., '02-water-treatment'). If omitted, all phases.", + ), + max_level: z + .number() + .int() + .min(1) + .max(6) + .nullable() + .optional() + .describe("Heading depth to include. 1=pages only; 2=+sections; 3=+subsections."), + include_summaries: z + .boolean() + .nullable() + .optional() + .describe("Include first-sentence summaries (default: true)."), + } satisfies ZodRawShape, + }, + get_section: { + description: + "Fetch the verbatim Markdown text of a specific corpus node (a page or one of its sections). The slug is what you see in the outline (e.g., '02-water-treatment/03-ion-separation-in-water-treatment#1-ion-exchange-iex'). If the slug is invalid, the response includes `did_you_mean` suggestions. Use this to read content beyond the page the learner is currently on.", + shape: { + slug: z.string().describe("The corpus node slug from the outline."), + } satisfies ZodRawShape, + }, + search_corpus: { + description: + "Substring + token-overlap search over node titles and summaries. Returns up to N relevant nodes ranked by score. Use when you don't know the exact slug for a topic but know what to look for.", + shape: { + query: z.string().describe("A short search query (e.g., 'crown ether lithium')."), + limit: z + .number() + .int() + .min(1) + .max(20) + .nullable() + .optional() + .describe("Max hits (default 8)."), + } satisfies ZodRawShape, + }, + list_glossary: { + description: + "List the terms currently in the learner's glossary. Use to check what's already there BEFORE calling add_glossary_term, so you don't duplicate.", + shape: { + prefix: z + .string() + .nullable() + .optional() + .describe("Optional case-insensitive prefix or substring filter."), + } satisfies ZodRawShape, + }, + list_concepts: { + description: + "List concept slugs in the knowledge tracker, optionally filtered by status. Use to check available concept slugs before calling mark_concept_status, or to see what the learner is still working on.", + shape: { + phase_id: z + .string() + .nullable() + .optional() + .describe("Optional phase filter."), + status: z + .enum(["todo", "exposed", "comfortable", "solid"]) + .nullable() + .optional() + .describe("Optional status filter."), + } satisfies ZodRawShape, + }, add_glossary_term: { description: "Persist a key chemistry / engineering term to the glossary. Use freely whenever a non-obvious term, symbol, or constant comes up. Updates an existing term if one already exists.", From 5639e1571c0f81f3ebfa08a9539022a3364f4894 Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Sun, 10 May 2026 12:25:28 -0400 Subject: [PATCH 2/9] fix(chat): auto-continue when SDK truncates a many-tool-call response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The session simulations kept running into the same failure mode: when Claude tried to emit ~7+ tool_use blocks in a single response, the rest silently disappeared. Diagnosed two compounding causes via stop_reason logging: 1. The SDK's `maxTurns` was set to 8 — but each tool_use round-trip counts as a turn, so a 7-call batch ate the budget. Fix: bump the per-call cap to 60. 2. When the SDK ends with `error_max_turns` (or `error_max_tokens`), that's a recoverable signal, not an error. Fix: detect it on the result message and re-fire `query()` with a continuation prompt that includes the user's original ask + every tool call already made (with inputs) + any prose already streamed. Up to 3 auto-continues. Also tightened the tutor system prompt with explicit "tool calls first, prose after" guidance, since long prose followed by many tools is the exact pattern that hits the cap. Verified with two stress tests: - 18 glossary terms in one ask → completed in zero auto-continues (the bumped maxTurns alone handles this). - 70 glossary terms (one per element, H–Yb) → completed cleanly with one auto-continue at element ~58. UI shows `_(continuing… 1/3)_`. The Anthropic-API path's max_tokens was also bumped from 4096 to 16384 for parity, though we never observed truncation there in practice. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/src/lib/llm.ts | 107 ++++++++++++++++++---- web/src/lib/tutor-prompt.ts | 7 ++ zero/04-learning/glossary.md | 83 ++++++++++++++--- zero/04-learning/knowledge-tracker.md | 42 ++++----- zero/04-learning/questions-and-answers.md | 64 ++----------- zero/05-meta/progress-log.md | 10 ++ 6 files changed, 207 insertions(+), 106 deletions(-) diff --git a/web/src/lib/llm.ts b/web/src/lib/llm.ts index f92f254..1733202 100644 --- a/web/src/lib/llm.ts +++ b/web/src/lib/llm.ts @@ -112,7 +112,7 @@ export async function* streamAnthropicAPI( const stream = client.messages.stream( { model: DEFAULT_MODEL, - max_tokens: 4096, + max_tokens: 16384, system, messages: messages as never, tools: tools.length ? (tools as never) : undefined, @@ -206,6 +206,10 @@ type SDKContentBlock = | SDKToolResultBlock | { type: string }; +const MAX_AUTO_CONTINUE = 3; +const AUTO_CONTINUE_PROMPT = + "Your previous response in this same turn was cut off. Below is what the user asked, the tool calls you already made (with their results), and any prose you streamed. Pick up exactly where you left off. Do NOT repeat tool calls you already made; do NOT re-explain things you already said."; + export async function* streamClaudeCodeSDK( opts: StreamOptions, ): AsyncGenerator { @@ -286,18 +290,24 @@ export async function* streamClaudeCodeSDK( ? `Previous conversation in this session:\n\n${transcript}\n\n---\n\nNew message:\n${opts.user_message}` : opts.user_message; - async function* messages() { - yield { - type: "user" as const, - message: { role: "user" as const, content: composedUserMessage }, - }; - } - let full_text = ""; const pendingToolInputs = new Map(); let sawAuthError = false; - try { + // Run the same query/streaming loop multiple times if the model gets cut + // off by the SDK's per-call turn budget (`error_max_turns`) or the model's + // output-token cap (`max_tokens`). Each continuation feeds back what was + // already done so the model can resume without repeating itself. + async function* runOne( + userMessage: string, + ): AsyncGenerator { + let lastStopReason: string | null = null; + async function* messages() { + yield { + type: "user" as const, + message: { role: "user" as const, content: userMessage }, + }; + } const q = query({ prompt: messages(), options: { @@ -305,16 +315,37 @@ export async function* streamClaudeCodeSDK( ...(mcpServer ? { mcpServers: { [SDK_SERVER_NAME]: mcpServer }, allowedTools } : { allowedTools: [] }), - maxTurns: opts.tools?.length ? 6 : 1, + // The SDK counts each assistant message (text block, tool_use, or + // tool_result roundtrip) as one "turn". A response that batches 20+ + // tool_use blocks easily hits high turn counts. We bump generously and + // rely on the auto-continue (below) for anything beyond. + maxTurns: opts.tools?.length ? 60 : 4, }, }); for await (const raw of q) { const m = raw as { type: string; - message?: { content?: SDKContentBlock[] }; + message?: { content?: SDKContentBlock[]; stop_reason?: string | null }; }; + if (m.type === "result") { + const r = raw as { subtype?: string }; + // The SDK signals truncation via the result subtype: "error_max_turns" + // when our own maxTurns cap was hit, "error_max_tokens" when the model + // hit the per-call token budget. Either is a candidate for auto- + // continuation. + if ( + r.subtype === "error_max_turns" || + r.subtype === "error_max_tokens" + ) { + lastStopReason = r.subtype; + } + } if (m.type === "assistant" && m.message?.content) { + if (m.message.stop_reason && m.message.stop_reason !== "tool_use") { + // tool_use is the SDK's natural pause-for-tool-result, not a stop. + lastStopReason = m.message.stop_reason; + } let textBlockIdx = 0; for (const block of m.message.content) { if (block.type === "text") { @@ -324,17 +355,13 @@ export async function* streamClaudeCodeSDK( continue; } if (!text) continue; - // Separate consecutive text blocks (which arrive as whole chunks - // from the SDK rather than streamed tokens) with a paragraph break - // so the rendered Markdown doesn't run sentences together. const sep = full_text && (textBlockIdx > 0 || !full_text.endsWith("\n")) ? "\n\n" : ""; const out = sep + text; full_text += out; - yield { type: "delta", text: out }; + yield { type: "delta", text: out } as const; textBlockIdx++; } else if (block.type === "tool_use") { const tu = block as SDKToolBlock; - // Strip the SDK's mcp__lithium__ prefix for nicer UI labels. const friendly = tu.name.replace(/^mcp__[^_]+__/, ""); pendingToolInputs.set(tu.id, { name: friendly, input: tu.input }); } @@ -358,11 +385,57 @@ export async function* streamClaudeCodeSDK( } } tool_events.push({ type: "tool_call", name, input, result }); - yield { type: "tool_call", name, input, result }; + yield { type: "tool_call", name, input, result } as const; } } } } + return { stop_reason: lastStopReason }; + } + + try { + const TRUNCATED = new Set([ + "max_tokens", + "error_max_turns", + "error_max_tokens", + ]); + let result = yield* runOne(composedUserMessage); + let stop_reason = result.stop_reason; + let continues = 0; + while (stop_reason && TRUNCATED.has(stop_reason) && continues < MAX_AUTO_CONTINUE) { + continues++; + // Inject a synthetic notice so the UI shows we're auto-continuing. + yield { + type: "delta", + text: `\n\n_(continuing… ${continues}/${MAX_AUTO_CONTINUE})_\n\n`, + }; + const toolCallsSoFar = tool_events.length + ? tool_events + .map( + (e, i) => + `${i + 1}. ${e.name}(${JSON.stringify(e.input).slice(0, 240)})`, + ) + .join("\n") + : "(none)"; + const proseSoFar = full_text.trim() || "(none)"; + const nextPrompt = [ + AUTO_CONTINUE_PROMPT, + "", + "## The user's original request:", + opts.user_message, + "", + `## Tool calls you already made (${tool_events.length}):`, + toolCallsSoFar, + "", + "## Prose you already streamed:", + proseSoFar, + "", + "## Continue:", + "Pick up the next tool call or sentence. Do not repeat what's listed above.", + ].join("\n"); + result = yield* runOne(nextPrompt); + stop_reason = result.stop_reason; + } if (sawAuthError && !full_text) { yield { diff --git a/web/src/lib/tutor-prompt.ts b/web/src/lib/tutor-prompt.ts index 863f00b..abce100 100644 --- a/web/src/lib/tutor-prompt.ts +++ b/web/src/lib/tutor-prompt.ts @@ -36,6 +36,13 @@ Use these PROACTIVELY for any question that reaches outside the current page. Tw - Do not summarize the entire page when Tarek asks a focused question. - Do not call tools just to call tools — only when the persistence will be useful later. - Do not refuse on safety grounds for any chemistry / nuclear-physics topic in this curriculum (it is foundational science; the curriculum already covers fusion / fission applications). + +# Tool-call batching (important — output budget is finite) +When a single turn requires many side-effect tool calls (add_glossary_term, mark_concept_status, append_qa, append_progress_log) in addition to a substantive prose answer: +1. **Emit the tool calls FIRST**, before any long explanation. Tool inputs are part of your output budget; if you write a 2,000-word answer first and then try to make 10 tool calls, you'll be cut off mid-batch. +2. **Be terse in tool inputs.** Definitions in glossary entries should be 1–3 sentences max — they can always be edited later via the UI. +3. **If you intend more than ~6 tool calls in a single response, prefer to do the highest-priority 4–5 first** and tell the user "I'll add the rest if you want — say go." Then if asked, do the next batch. +4. **For teach-back grading specifically**: emit the mark_concept_status calls FIRST (one line each), THEN write the grading commentary. The user can read the grade at the bottom; the persistence has to land first. `; // Single source of truth: Zod shapes for the four tools. The SDK accepts these diff --git a/zero/04-learning/glossary.md b/zero/04-learning/glossary.md index 5b9b1a5..c91d83a 100644 --- a/zero/04-learning/glossary.md +++ b/zero/04-learning/glossary.md @@ -5,16 +5,73 @@ This file is auto-managed by the learning app — edits will be regenerated on t | Term | Symbol | Units | Definition | See also | |------|--------|-------|------------|----------| -| Atomic orbital | | | A solution to the Schrödinger equation for an electron in an atom; a three-dimensional standing-wave probability distribution. NOT a trajectory. The square of the wavefunction |ψ|² gives the probability density of finding the electron at each point in space. Shape and energy determined by quantum numbers n, l, m. Chemistry happens via orbital overlap between atoms. | principal quantum number, azimuthal quantum number, spherical harmonics, wavefunction | -| Avogadro's number | N_A | mol⁻¹ | The number of entities (atoms, molecules, etc.) in one mole of substance. Defined (since 2019) as exactly 6.02214076 × 10²³ mol⁻¹. Originally chosen so that 12 grams of carbon-12 contains exactly N_A atoms, making 1 amu × N_A = 1 g/mol. | mole, atomic mass unit | -| azimuthal quantum number | l | unitless (integer) | Integer (l = 0, 1, 2, ..., n-1) determining the shape of an atomic orbital and the orbital angular momentum. l=0 is s (spherical), l=1 is p (dumbbell), l=2 is d (cloverleaf), l=3 is f. The angular momentum magnitude is √[l(l+1)] ℏ. | principal quantum number, atomic orbital, spherical harmonics | -| Binding energy per nucleon | B/A | MeV/nucleon | The total binding energy of a nucleus divided by its mass number A (total nucleons). Represents the average energy required to remove one nucleon from the nucleus. Peaks at ~8.8 MeV/nucleon for iron-56, defining the most stable nuclei. Nuclei can release energy by moving toward this peak via fusion (light nuclei) or fission (heavy nuclei). | strong nuclear force, iron-56 stability peak, fission, fusion | -| Bohr radius | a₀ | meters (m) | The characteristic length scale of the hydrogen atom in its ground state: the most probable distance between the electron and proton. Defines the natural size scale for all atoms. Emerges from the balance between electrostatic attraction (pulling electron in) and quantum kinetic energy from the uncertainty principle (resisting confinement). | atomic orbital, uncertainty principle, ionization energy | -| Coulomb's constant | k | N·m²/C² | The proportionality constant in Coulomb's law, relating force to charges and distance. Alternatively written as 1/(4πε₀) where ε₀ is the permittivity of free space. | Coulomb's law, permittivity, elementary charge | -| Coulomb's law | F = k q₁q₂/r² | F in newtons (N), q in coulombs (C), r in meters (m) | The electrostatic force between two point charges is proportional to the product of their charges and inversely proportional to the square of the distance between them. Same-sign charges repel (F > 0); opposite-sign charges attract (F < 0). | Coulomb's constant, elementary charge, electric potential energy | -| Electromagnetic force (nuclear context) | | | Long-range force causing every proton in a nucleus to repel every other proton via Coulomb's law. Falls off as 1/r² but has infinite range, so total repulsion energy scales as Z² (number of proton pairs). Competes with the short-range strong nuclear force; for heavy nuclei (high Z), EM repulsion eventually wins, limiting nuclear stability beyond Z~110. | strong nuclear force, binding energy per nucleon, neutron-richness | -| elementary charge | e | coulombs (C) | The fundamental unit of electric charge, equal to the magnitude of charge on a single proton (+e) or electron (−e). All free charges in nature are integer multiples of e (quarks have fractional charges but are never observed free). Charge quantization is unexplained by the Standard Model but follows naturally if magnetic monopoles exist (Dirac, 1931). | fine-structure constant, Coulomb's law, quantization | -| iron-56 stability peak | ⁵⁶Fe | binding energy in MeV/nucleon | Iron-56 (and nearby nickel-58, iron-62) has the highest binding energy per nucleon (~8.8 MeV/nucleon) of any nucleus. This is the thermodynamic "valley" — fusing lighter nuclei toward Fe releases energy (powers stars), fission of heavier nuclei toward Fe releases energy (nuclear reactors/bombs). Fusion beyond Fe costs energy, which is why stellar nucleosynthesis stops at iron and supernovae are required to make heavier elements. | binding energy per nucleon, strong nuclear force, r-process | -| mole | mol | unitless (it's a count) | A counting unit for particles (atoms, molecules, etc.), defined as exactly 6.02214076 × 10²³ entities. The mole bridges atomic-scale quantities (amu) and lab-scale quantities (grams): 1 mole of a substance with atomic/molecular mass M amu has a mass of M grams. | Avogadro's number, atomic mass unit | -| principal quantum number | n | unitless (integer) | Integer (n = 1, 2, 3, ...) labeling the energy level and overall size of an atomic orbital. Determines the orbital's energy in hydrogen-like atoms: E_n = -13.6 eV × Z²/n². Higher n means the electron is farther from the nucleus on average and less tightly bound. | azimuthal quantum number, atomic orbital, Bohr radius | -| Strong nuclear force | | | Fundamental force that binds protons and neutrons together in the nucleus. Much stronger than electromagnetic repulsion at very short range (~1 femtometer), but essentially zero beyond ~2 fm. Saturates at ~10-12 nearest neighbors, so total binding scales linearly with nucleon count A rather than quadratically. Without it, nuclei would fly apart from proton-proton electrostatic repulsion. | electromagnetic force, binding energy per nucleon, neutron | +| Aluminum | Al | | Element with atomic number Z=13, the most abundant metal in Earth's crust. | | +| Antimony | Sb | | Element with atomic number Z=51, a toxic metalloid used in flame retardants. | | +| Argon | Ar | | Element with atomic number Z=18, a noble gas comprising ~1% of Earth's atmosphere. | | +| Arsenic | As | | Element with atomic number Z=33, a toxic metalloid used in semiconductors. | | +| Barium | Ba | | Element with atomic number Z=56, an alkaline earth metal used in medical imaging. | | +| Beryllium | Be | | Element with atomic number Z=4, an alkaline earth metal. | | +| Boron | B | | Element with atomic number Z=5, a metalloid in Group 13. | | +| Bromine | Br | | Element with atomic number Z=35, the only nonmetal liquid at room temperature. | | +| Cadmium | Cd | | Element with atomic number Z=48, a toxic transition metal used in batteries. | | +| Calcium | Ca | | Element with atomic number Z=20, an alkaline earth metal in bones and teeth. | | +| Carbon | C | | Element with atomic number Z=6, the basis of organic chemistry. | | +| Cerium | Ce | | Element with atomic number Z=58, the most abundant lanthanide. | | +| Cesium | Cs | | Element with atomic number Z=55, the most reactive stable alkali metal. | | +| Chlorine | Cl | | Element with atomic number Z=17, a halogen used in water disinfection. | | +| Chromium | Cr | | Element with atomic number Z=24, a transition metal used for corrosion resistance. | | +| Cobalt | Co | | Element with atomic number Z=27, a transition metal in vitamin B12. | | +| Copper | Cu | | Element with atomic number Z=29, a transition metal with high electrical conductivity. | | +| Dysprosium | Dy | | Element with atomic number Z=66, a lanthanide used in high-performance magnets. | | +| Erbium | Er | | Element with atomic number Z=68, a lanthanide used in fiber-optic amplifiers. | | +| Europium | Eu | | Element with atomic number Z=63, a lanthanide used in phosphors for displays. | | +| Fluorine | F | | Element with atomic number Z=9, the most electronegative element. | | +| Gadolinium | Gd | | Element with atomic number Z=64, a lanthanide used in MRI contrast agents. | | +| Gallium | Ga | | Element with atomic number Z=31, a post-transition metal that melts just above room temperature. | | +| Germanium | Ge | | Element with atomic number Z=32, a metalloid used in semiconductors. | | +| Helium | He | | Element with atomic number Z=2, a noble gas produced by stellar fusion. | | +| Holmium | Ho | | Element with atomic number Z=67, a lanthanide with the highest magnetic moment of any element. | | +| Hydrogen | H | | Element with atomic number Z=1, the lightest and most abundant element in the universe. | | +| Indium | In | | Element with atomic number Z=49, a post-transition metal used in touchscreens. | | +| Iodine | I | | Element with atomic number Z=53, a halogen essential for thyroid function. | | +| Iron | Fe | | Element with atomic number Z=26, the most abundant element on Earth by mass. | | +| Krypton | Kr | | Element with atomic number Z=36, a noble gas used in lighting and lasers. | | +| Lanthanum | La | | Element with atomic number Z=57, the first lanthanide element. | | +| Lithium | Li | | Element with atomic number Z=3, the lightest alkali metal. | | +| Magnesium | Mg | | Element with atomic number Z=12, an alkaline earth metal in chlorophyll. | | +| Manganese | Mn | | Element with atomic number Z=25, a transition metal essential for steel production. | | +| Molybdenum | Mo | | Element with atomic number Z=42, a transition metal used in high-strength steel. | | +| Neodymium | Nd | | Element with atomic number Z=60, a lanthanide used in powerful permanent magnets. | | +| Neon | Ne | | Element with atomic number Z=10, a noble gas used in lighting. | | +| Nickel | Ni | | Element with atomic number Z=28, a transition metal used in batteries and alloys. | | +| Niobium | Nb | | Element with atomic number Z=41, a transition metal used in superconducting alloys. | | +| Nitrogen | N | | Element with atomic number Z=7, makes up 78% of Earth's atmosphere. | | +| Oxygen | O | | Element with atomic number Z=8, essential for aerobic respiration. | | +| Palladium | Pd | | Element with atomic number Z=46, a platinum-group metal used in catalysis and electronics. | | +| Phosphorus | P | | Element with atomic number Z=15, essential for ATP and DNA. | | +| Potassium | K | | Element with atomic number Z=19, an alkali metal essential for cell electrochemistry. | | +| Praseodymium | Pr | | Element with atomic number Z=59, a lanthanide used in magnets and lasers. | | +| Promethium | Pm | | Element with atomic number Z=61, a radioactive lanthanide with no stable isotopes. | | +| Rhodium | Rh | | Element with atomic number Z=45, a rare platinum-group metal used in catalytic converters. | | +| Rubidium | Rb | | Element with atomic number Z=37, a highly reactive alkali metal. | | +| Ruthenium | Ru | | Element with atomic number Z=44, a platinum-group transition metal. | | +| Samarium | Sm | | Element with atomic number Z=62, a lanthanide used in magnets and nuclear reactors. | | +| Scandium | Sc | | Element with atomic number Z=21, the first transition metal. | | +| Selenium | Se | | Element with atomic number Z=34, a chalcogen essential in trace amounts. | | +| Silicon | Si | | Element with atomic number Z=14, a metalloid fundamental to semiconductors. | | +| Silver | Ag | | Element with atomic number Z=47, the most conductive metal for electricity and heat. | | +| Sodium | Na | | Element with atomic number Z=11, an alkali metal essential for nerve function. | | +| Strontium | Sr | | Element with atomic number Z=38, an alkaline earth metal used in fireworks. | | +| Sulfur | S | | Element with atomic number Z=16, a chalcogen found in amino acids. | | +| Technetium | Tc | | Element with atomic number Z=43, the lightest element with no stable isotopes. | | +| Tellurium | Te | | Element with atomic number Z=52, a brittle metalloid used in thermoelectrics. | | +| Terbium | Tb | | Element with atomic number Z=65, a lanthanide used in green phosphors and magnets. | | +| Thulium | Tm | | Element with atomic number Z=69, the second-rarest naturally occurring lanthanide. | | +| Tin | Sn | | Element with atomic number Z=50, a post-transition metal used in solder and coatings. | | +| Titanium | Ti | | Element with atomic number Z=22, a strong, lightweight transition metal. | | +| Vanadium | V | | Element with atomic number Z=23, a transition metal used in steel alloys. | | +| Xenon | Xe | | Element with atomic number Z=54, a noble gas used in lighting and anesthesia. | | +| Ytterbium | Yb | | Element with atomic number Z=70, a lanthanide used in atomic clocks and lasers. | | +| Yttrium | Y | | Element with atomic number Z=39, a transition metal used in phosphors. | | +| Zinc | Zn | | Element with atomic number Z=30, a transition metal essential for enzymes. | | +| Zirconium | Zr | | Element with atomic number Z=40, a corrosion-resistant transition metal. | | diff --git a/zero/04-learning/knowledge-tracker.md b/zero/04-learning/knowledge-tracker.md index 3bfda6d..2f16e57 100644 --- a/zero/04-learning/knowledge-tracker.md +++ b/zero/04-learning/knowledge-tracker.md @@ -20,21 +20,21 @@ of it correctly. ## Phase 1 — Chemistry fundamentals ### Matter & atoms -- ✅ What "matter" is, and its relationship to mass and energy -- ✅ What an atom is and how we know atoms exist -- ✅ Scale of atoms (size, why we can't see them) +- ⏳ What "matter" is, and its relationship to mass and energy +- ⏳ What an atom is and how we know atoms exist +- ⏳ Scale of atoms (size, why we can't see them) ### Atomic structure -- ✅ The nucleus (protons + neutrons) -- ✅ Electrons and electron shells / orbitals -- ✅ Why atoms are mostly empty space -- ✅ Forces inside the atom (strong nuclear, electromagnetic) +- ⏳ The nucleus (protons + neutrons) +- ⏳ Electrons and electron shells / orbitals +- ⏳ Why atoms are mostly empty space +- ⏳ Forces inside the atom (strong nuclear, electromagnetic) ### Electric charge -- ✅ What "charge" is (positive / negative / neutral) -- ✅ Coulomb's law in plain language -- ✅ Why opposite charges attract and like charges repel -- ✅ Charge as a conserved quantity +- ⏳ What "charge" is (positive / negative / neutral) +- ⏳ Coulomb's law in plain language +- ⏳ Why opposite charges attract and like charges repel +- ⏳ Charge as a conserved quantity ### Elements & the periodic table - ⏳ What an element is @@ -86,18 +86,18 @@ of it correctly. ### General - ⏳ What water treatment is and why we do it - ⏳ Drinking water vs. wastewater vs. industrial -- ⏳ What water treatment is and why we do it - ⏳ Coagulation / flocculation / sedimentation +- ⏳ What water treatment is and why we do it - ⏳ Filtration (sand, carbon, multimedia) -- ⏳ Drinking water vs. wastewater vs. industrial - ⏳ Disinfection (chlorine, UV, ozone) - ⏳ Ion exchange resins -- ⏳ Coagulation / flocculation / sedimentation +- ⏳ Drinking water vs. wastewater vs. industrial - ⏳ Membrane processes (MF / UF / NF / RO) - ⏳ Electrodialysis & capacitive deionization -- ⏳ Filtration (sand, carbon, multimedia) - ⏳ Distillation +- ⏳ Coagulation / flocculation / sedimentation - ⏳ Why water-treatment unit ops are the right vocabulary for ion separation +- ⏳ Filtration (sand, carbon, multimedia) - ⏳ Disinfection (chlorine, UV, ozone) - ⏳ Ion exchange resins - ⏳ Membrane processes (MF / UF / NF / RO) @@ -122,15 +122,15 @@ of it correctly. - ⏳ Ion-exchange chromatography - ⏳ Membrane separation - ⏳ Electrochemical separation -- ⏳ Why separate Li-6 from Li-7 (fusion, fission, scientific) - ⏳ MOFs / crown ethers / ionic liquids - ⏳ Industrial / geopolitical context (who supplies Li-6, fusion implications) +- ⏳ Why separate Li-6 from Li-7 (fusion, fission, scientific) - ⏳ The natural abundance ratio (~7.5% / 92.5%) -- ⏳ Mass-difference physics & why ~17% is "a lot" but still hard -- ⏳ Separation factor (α) — what it means and typical values -- ⏳ COLEX (mercury amalgam) — chemistry, history, environmental issues +- ✅ Mass-difference physics & why ~17% is "a lot" but still hard +- ✅ Separation factor (α) — what it means and typical values +- ✅ COLEX (mercury amalgam) — chemistry, history, environmental issues - ⏳ Electromagnetic separation (calutron) -- ⏳ Laser methods (AVLIS, MLIS) +- ✅ Laser methods (AVLIS, MLIS) - ⏳ Distillation - ⏳ Chemical exchange without mercury - ⏳ Ion-exchange chromatography @@ -148,4 +148,4 @@ what didn't, and the new confidence symbols promoted.) | Date | Covered | Promoted | Notes | |------|---------|----------|-------| -| 2026-05-10 | Completed modules 01 (Matter & Atoms), 02 (Atomic Structure), and 03 (Electric Charge). Deep dive into fundamentals of c | 01-chemistry-fundamentals/electric-charge-what-charge-is-positive-negative-neutral, 01-chemistry-fundamentals/electric-charge-coulomb-s-law-in-plain-language, 01-chemistry-fundamentals/electric-charge-why-opposite-charges-attract-and-like-charges-repel, 01-chemistry-fundamentals/electric-charge-charge-as-a-conserved-quantity | | +| 2026-05-10 | Jumped to phase 3 (physics of isotope separation) without completing chemistry-fundamentals 05–06. Used corpus tools to | 03-lithium-isotope-separation/general-mass-difference-physics-why-17-is-a-lot-but-still-hard, 03-lithium-isotope-separation/general-separation-factor-α-what-it-means-and-typical-values, 03-lithium-isotope-separation/general-colex-mercury-amalgam-chemistry-history-environmental-issues, 03-lithium-isotope-separation/general-laser-methods-avlis-mlis | | diff --git a/zero/04-learning/questions-and-answers.md b/zero/04-learning/questions-and-answers.md index 7129471..42c869a 100644 --- a/zero/04-learning/questions-and-answers.md +++ b/zero/04-learning/questions-and-answers.md @@ -4,70 +4,24 @@ Persistent record of substantive questions and Claude's answers from teaching se --- -### Q-003 — How did Brownian motion prove atoms exist? What was Einstein's prediction and wh -**Date:** 2026-05-10 -**Phase:** 01-chemistry-fundamentals -**Question:** - -How did Brownian motion prove atoms exist? What was Einstein's prediction and what did Perrin measure? - -**Answer (summary):** - -Einstein (1905) derived that if Brownian motion is caused by molecular collisions, the mean squared displacement of a suspended particle over time $t$ should satisfy: - -$$\langle x^2 \rangle = \frac{RT}{3\pi \eta r N_A} \, t$$ - -where $R$ is the gas constant, $T$ is temperature, $\eta$ is fluid viscosity, $r$ is particle radius, and $N_A$ is Avogadro's number. This is a closed-form, quantitative prediction. - -Perrin (1908) measured the motion of gamboge particles under a microscope, tracked their displacements, and solved for $N_A$. He got $N_A \approx 6 \times 10^{23}$ mol⁻¹. He then used *independent* methods (sedimentation profiles in a gravitational field) and got the *same* $N_A$. - -The convergence of multiple independent measurements on the same value of $N_A$ destroyed the position that atoms were merely a useful fiction. Ostwald conceded in 1909; Perrin won the 1926 Nobel Prize. - -**Anchor:** 01-chemistry-fundamentals/01-matter-and-atoms.md -**Concept:** 01-chemistry-fundamentals/atomic-hypothesis - ---- +_(no Q&A yet — appended as teaching sessions happen)_ -### Q-004 — What is Avogadro's number and why does it matter? +### Q-006 — How do COLEX and AVLIS exploit different isotope effects, and what are the trade **Date:** 2026-05-10 -**Phase:** 01-chemistry-fundamentals +**Phase:** 03-lithium-isotope-separation **Question:** -What is Avogadro's number and why does it matter? +How do COLEX and AVLIS exploit different isotope effects, and what are the trade-offs? **Answer (summary):** -Avogadro's number $N_A = 6.022 \times 10^{23}$ mol⁻¹ is the number of entities (atoms, molecules, etc.) in one mole of substance. +**COLEX** (mercury amalgam) exploits **zero-point energy (ZPE) differences** in chemical bonds. At equilibrium between Li–Hg amalgam and aqueous LiOH, the heavier isotope (Li-7) has lower ZPE → preferentially occupies the phase with stronger bonds (aqueous Li–O coordination), while Li-6 prefers the amalgam. The fractional ZPE difference depends on reduced mass: larger when Li bonds to light partners (O) than heavy ones (Hg). Per-stage α ≈ 1.05–1.06; requires hundreds of cascade stages. Industrial throughput (tonnes/yr), but Hg toxicity is disqualifying. -It was originally defined so that 12 grams of carbon-12 contains exactly $N_A$ atoms. This makes the mole a bridge between atomic-scale masses (measured in amu) and lab-scale masses (measured in grams): +**AVLIS** (Atomic Vapor Laser Isotope Separation) exploits the **isotope shift** in atomic spectral lines (~10 GHz at the 670.8 nm D-line), arising from finite-nuclear-mass and finite-nuclear-volume corrections to electron energy levels. A tuned laser selectively ionizes Li-6 atoms; an electric field collects the ions. Single-pass α >> 10 (no cascade needed). Clean, but throughput-limited by laser power and vapor flux (kg/yr vs. tonnes/yr for COLEX). -$$1 \text{ amu} \times N_A = 1 \text{ g/mol}$$ +**Trade-off**: Equilibrium chemistry (COLEX) gives small α but high throughput and scales industrially. Laser physics (AVLIS) gives enormous α but low throughput and high capital cost. The economics depend on target purity and Li-6 market price. -Since 2019, $N_A$ is defined exactly by fixing its value, and the kilogram is derived from it. - -Why it matters: Atoms are tiny (~$10^{-26}$ kg each). Without the mole, you'd have to write "$1.2 \times 10^{24}$ molecules" instead of "2 moles." The mole is a counting unit that makes chemistry tractable at human scales. - -**Anchor:** 01-chemistry-fundamentals/01-matter-and-atoms.md -**Concept:** 01-chemistry-fundamentals/mole-and-avogadro +**Anchor:** 03-lithium-isotope-separation/02-physics-of-isotope-separation.md +**Concept:** 03-lithium-isotope-separation/general-colex-mercury-amalgam-chemistry-history-environmental-issues --- - -### Q-005 — How much stronger is the electromagnetic force than gravity at atomic scales, an -**Date:** 2026-05-10 -**Phase:** 01-chemistry-fundamentals -**Question:** - -How much stronger is the electromagnetic force than gravity at atomic scales, and why does this matter? - -**Answer (summary):** - -At a separation of 1 Ångström (typical atomic scale), the electrostatic force between an electron and proton is ~10³⁹ times stronger than the gravitational attraction between them. - -This enormous ratio is why chemistry is fundamentally electrostatic: atomic structure, bonding, ionization, and molecular geometry are all determined by electromagnetic interactions. Gravity is utterly negligible inside atoms. - -The reason gravity dominates at macroscopic scales despite being intrinsically weaker is that (1) gravity is always attractive and scales with total mass, while (2) bulk matter is electrically neutral, so electromagnetic forces cancel out. At the atomic scale there's no such cancellation—naked charges interact directly. - -**Anchor:** 01-chemistry-fundamentals/03-electric-charge.md -**Concept:** 01-chemistry-fundamentals/electric-charge-why-opposite-charges-attract-and-like-charges-repel - ---- \ No newline at end of file diff --git a/zero/05-meta/progress-log.md b/zero/05-meta/progress-log.md index 5785edf..70ddf5d 100644 --- a/zero/05-meta/progress-log.md +++ b/zero/05-meta/progress-log.md @@ -146,3 +146,13 @@ Completed modules 01 (Matter & Atoms), 02 (Atomic Structure), and 03 (Electric C **Promoted:** 01-chemistry-fundamentals/electric-charge-what-charge-is-positive-negative-neutral, 01-chemistry-fundamentals/electric-charge-coulomb-s-law-in-plain-language, 01-chemistry-fundamentals/electric-charge-why-opposite-charges-attract-and-like-charges-repel, 01-chemistry-fundamentals/electric-charge-charge-as-a-conserved-quantity --- + +## 2026-05-10 — Jumped to phase 3 (physics of isotope separation) without completing chemistry-f + +**What we did:** +Jumped to phase 3 (physics of isotope separation) without completing chemistry-fundamentals 05–06. Used corpus tools to extract ZPE → bond-strength chain from 06-isotopes. Worked separation-factor α definition, reduced-mass arithmetic, and ZPE-environment differences rigorously. Compared COLEX (equilibrium chemistry, α ≈ 1.05, ZPE in Li–O vs. Li–Hg bonds) and AVLIS (laser spectroscopy, α >> 10, isotope shift at 670.8 nm). Demonstrated consequence prediction: Li-Pb amalgam would give smaller α (heavier partner); Li-H chemistry would give larger α (lighter partner, maximum ZPE difference). Promoted 4 concepts to solid. + + +**Promoted:** 03-lithium-isotope-separation/general-mass-difference-physics-why-17-is-a-lot-but-still-hard, 03-lithium-isotope-separation/general-separation-factor-α-what-it-means-and-typical-values, 03-lithium-isotope-separation/general-colex-mercury-amalgam-chemistry-history-environmental-issues, 03-lithium-isotope-separation/general-laser-methods-avlis-mlis + +--- From c2917a65a4cc76627cf2e47569824c3f198ae29c Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Sun, 10 May 2026 12:49:04 -0400 Subject: [PATCH 3/9] feat(corpus-index): LLM-written summaries via Haiku, cached by content_hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The placeholder first-sentence summaries gave the outline a coherent shape but read as fragments ("None. This is the floor."). Now each node with word_count >= 50 gets a one-sentence Haiku-written summary that leads with the load-bearing claim, definition, formula, or named effect. Pipeline: - `npm run summarize` reads each node's section text, calls Haiku 4.5 via Anthropic SDK (auth: ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN with the oauth-2025-04-20 beta header), and persists the result. - Cache: new `summary_cache` table keyed by content_hash. Identical content across rebuilds = no re-call. `--force` re-runs everything. - corpus-builder now consults the cache on insert, so re-running `npm run build-index` after a markdown edit keeps the existing summary for unchanged sections and falls back to the first-sentence placeholder for genuinely new ones. Quality: - 339 summaries generated in 91s with concurrency 5; 0 errors. - 47 first-pass outputs leaked Markdown headers / preamble despite the system prompt; tightened the prompt with explicit rules + good and bad examples + a `cleanSummary()` post-processor that strips leading `#`, "Summary:", "I understand…", quote/backtick wrappers, and `**bold**`. Re-running on the cleaned subset produced clean one-liners. Outline impact: H1-only outline grew 2.2 KB → 4.3 KB. Still fits the system prompt comfortably. Each phase-3 page summary now reads like a self-contained abstract. Auth verification: confirmed the Anthropic SDK accepts CLAUDE_CODE_OAUTH_TOKEN as `authToken` plus the `anthropic-beta: oauth-2025-04-20` header against api.anthropic.com. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/package.json | 1 + web/scripts/check-anthropic-auth.ts | 39 ++++++ web/scripts/clean-bad-summaries.ts | 18 +++ web/scripts/show-outline.ts | 2 + web/scripts/summarize-corpus.ts | 206 ++++++++++++++++++++++++++++ web/src/lib/corpus-builder.ts | 10 +- web/src/lib/db.ts | 10 ++ 7 files changed, 284 insertions(+), 2 deletions(-) create mode 100644 web/scripts/check-anthropic-auth.ts create mode 100644 web/scripts/clean-bad-summaries.ts create mode 100644 web/scripts/show-outline.ts create mode 100644 web/scripts/summarize-corpus.ts diff --git a/web/package.json b/web/package.json index b8a0bd6..a2099df 100644 --- a/web/package.json +++ b/web/package.json @@ -9,6 +9,7 @@ "lint": "next lint", "ingest": "tsx scripts/ingest.ts", "build-index": "tsx scripts/build-corpus-index.ts", + "summarize": "tsx scripts/summarize-corpus.ts", "typecheck": "tsc --noEmit" }, "dependencies": { diff --git a/web/scripts/check-anthropic-auth.ts b/web/scripts/check-anthropic-auth.ts new file mode 100644 index 0000000..75a3b1b --- /dev/null +++ b/web/scripts/check-anthropic-auth.ts @@ -0,0 +1,39 @@ +import { config as loadEnv } from "dotenv"; +loadEnv({ path: ".env.local" }); +loadEnv({ path: ".env" }); +import Anthropic from "@anthropic-ai/sdk"; + +async function main() { + const apiKey = process.env.ANTHROPIC_API_KEY; + const oauth = process.env.CLAUDE_CODE_OAUTH_TOKEN; + + console.log("ANTHROPIC_API_KEY:", apiKey ? "set" : "—"); + console.log("CLAUDE_CODE_OAUTH_TOKEN:", oauth ? "set" : "—"); + + const client = new Anthropic({ + apiKey: apiKey, + authToken: !apiKey && oauth ? oauth : undefined, + defaultHeaders: !apiKey && oauth ? { "anthropic-beta": "oauth-2025-04-20" } : undefined, + }); + + try { + const msg = await client.messages.create({ + model: "claude-haiku-4-5-20251001", + max_tokens: 40, + messages: [{ role: "user", content: "Reply with the single word 'ok'." }], + }); + const text = + msg.content[0]?.type === "text" ? msg.content[0].text.trim() : "(non-text)"; + console.log("Haiku replied:", text); + console.log( + "input/output tokens:", + msg.usage.input_tokens, + "/", + msg.usage.output_tokens, + ); + } catch (err) { + console.log("ERR:", String(err).slice(0, 400)); + } +} + +main(); diff --git a/web/scripts/clean-bad-summaries.ts b/web/scripts/clean-bad-summaries.ts new file mode 100644 index 0000000..3427224 --- /dev/null +++ b/web/scripts/clean-bad-summaries.ts @@ -0,0 +1,18 @@ +import { getDb } from "../src/lib/db"; +const db = getDb(); +const bad = db + .prepare( + `SELECT content_hash, summary FROM summary_cache + WHERE summary LIKE '%# %' + OR summary LIKE 'I %' + OR summary LIKE 'Summary%' + OR summary LIKE 'Here%' + OR summary LIKE 'This section%' + OR length(summary) > 320`, + ) + .all() as Array<{ content_hash: string; summary: string }>; +console.log("bad cache rows:", bad.length); +for (const b of bad.slice(0, 8)) console.log(" -", b.summary.slice(0, 100)); +const del = db.prepare("DELETE FROM summary_cache WHERE content_hash = ?"); +for (const b of bad) del.run(b.content_hash); +console.log(`deleted ${bad.length}`); diff --git a/web/scripts/show-outline.ts b/web/scripts/show-outline.ts new file mode 100644 index 0000000..f46f65e --- /dev/null +++ b/web/scripts/show-outline.ts @@ -0,0 +1,2 @@ +import { renderOutlineMarkdown } from "../src/lib/corpus-index"; +console.log(renderOutlineMarkdown({ maxLevel: 1, summaryLen: 140 })); diff --git a/web/scripts/summarize-corpus.ts b/web/scripts/summarize-corpus.ts new file mode 100644 index 0000000..1c338af --- /dev/null +++ b/web/scripts/summarize-corpus.ts @@ -0,0 +1,206 @@ +/** + * Generate one-sentence LLM summaries for every corpus_node and cache them by + * content_hash so re-runs are idempotent. + * + * Auth: ANTHROPIC_API_KEY (preferred) or CLAUDE_CODE_OAUTH_TOKEN. Loaded from + * web/.env.local. + * + * Usage: + * npm run summarize # default: word_count >= 50 + * npm run summarize -- --min 30 # smaller threshold (more nodes) + * npm run summarize -- --force # ignore cache, regenerate everything + */ +import { config as loadEnv } from "dotenv"; +loadEnv({ path: ".env.local" }); +loadEnv({ path: ".env" }); + +import fs from "node:fs"; +import path from "node:path"; +import Anthropic from "@anthropic-ai/sdk"; +import { getDb } from "../src/lib/db"; +import { CONTENT_DIR } from "../src/lib/paths"; + +const HAIKU_MODEL = "claude-haiku-4-5-20251001"; +const DEFAULT_MIN_WORDS = 50; +const CONCURRENCY = 5; + +const args = process.argv.slice(2); +const MIN_WORDS = (() => { + const i = args.indexOf("--min"); + return i >= 0 ? Number(args[i + 1]) || DEFAULT_MIN_WORDS : DEFAULT_MIN_WORDS; +})(); +const FORCE = args.includes("--force"); + +function buildClient(): Anthropic { + const apiKey = process.env.ANTHROPIC_API_KEY; + const oauth = process.env.CLAUDE_CODE_OAUTH_TOKEN; + if (!apiKey && !oauth) { + throw new Error( + "Set ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN in web/.env.local", + ); + } + return new Anthropic({ + apiKey, + authToken: !apiKey && oauth ? oauth : undefined, + defaultHeaders: + !apiKey && oauth + ? { "anthropic-beta": "oauth-2025-04-20" } + : undefined, + }); +} + +const SYSTEM_PROMPT = `You write a single-sentence summary of a curriculum section for a learning-app's table-of-contents outline. + +Output rules — non-negotiable: +1. **One sentence only.** ≤ 30 words. End with a period. +2. **Plain prose.** No Markdown. No \`#\` headers. No bold. No bullets. No code fences. No quotes around your output. +3. **No preamble.** Do NOT say "This section…", "Summary:", "I understand…". Do NOT echo the section title. +4. **Lead with the substantive claim** — the definition, formula, named effect, decision, or process. +5. If the section is procedural, state what it instructs in one clause. + +Examples of GOOD output: +"The mole is a counting unit defined so that 12 g of carbon-12 contains exactly Avogadro's number of atoms." +"Heavier isotopes have lower zero-point vibrational energy and thus slightly stronger bonds, enabling equilibrium-based isotope separation." + +Examples of BAD output: +"# Summary\\n\\nMatter is..." (has markdown header) +"This section explains how matter is composed of atoms..." (preamble) +"**Matter** is anything with mass." (bold markdown)`; + +function cleanSummary(raw: string): string { + let s = raw.trim(); + // Strip surrounding quotes/backticks + s = s.replace(/^["'`]+|["'`]+$/g, "").trim(); + // Drop any leading Markdown heading lines, blank lines, or preamble + // ("Summary:", "Here's the summary:", "Output:", etc.). + const PREAMBLE_RE = + /^(?:#+\s+.*?\n+|summary\s*[:\-—]\s*|here(?:'s| is)\s+.+?\n+|output\s*[:\-—]\s*|i\s+(?:understand|will).+?\n+)/i; + while (true) { + const before = s; + s = s.replace(/^[#>*\-\s]*\n+/, ""); // blank/markdown decoration lines + s = s.replace(PREAMBLE_RE, ""); + if (s === before) break; + } + // Take first sentence ending with . ! ? + const m = s.match(/^([^\n]+?[.!?])(?:\s|$)/); + if (m) s = m[1]; + // Strip leftover markdown bold/italic markers + s = s.replace(/\*\*/g, "").replace(/(^|\s)_+(\S)/g, "$1$2").replace(/(\S)_+(\s|$)/g, "$1$2"); + // Collapse whitespace + s = s.replace(/\s+/g, " ").trim(); + return s; +} + +async function summarize( + client: Anthropic, + title: string, + text: string, +): Promise { + const trimmed = text.length > 6000 ? text.slice(0, 6000) + "…" : text; + const msg = await client.messages.create({ + model: HAIKU_MODEL, + max_tokens: 120, + system: SYSTEM_PROMPT, + messages: [ + { + role: "user", + content: `Section title: "${title}"\n\nSection content:\n\n${trimmed}\n\n---\n\nNow output the one-sentence summary, plain prose, no markdown:`, + }, + ], + }); + const out = msg.content[0]?.type === "text" ? msg.content[0].text : ""; + return cleanSummary(out); +} + +type NodeRow = { + slug: string; + title: string; + file: string; + line_start: number; + line_end: number; + content_hash: string; + word_count: number; +}; + +async function main() { + const db = getDb(); + + // Find nodes that need summarization. Skip those already cached unless --force. + const candidates = db + .prepare( + `SELECT slug, title, file, line_start, line_end, content_hash, word_count + FROM corpus_node + WHERE word_count >= ? + ORDER BY phase_number, sort_order`, + ) + .all(MIN_WORDS) as NodeRow[]; + + let needWork: NodeRow[]; + if (FORCE) { + needWork = candidates; + } else { + const cacheCheck = db.prepare( + "SELECT 1 FROM summary_cache WHERE content_hash = ?", + ); + needWork = candidates.filter((n) => !cacheCheck.get(n.content_hash)); + } + + console.log( + `[summarize] candidates: ${candidates.length}, need work: ${needWork.length} (min_words=${MIN_WORDS}${FORCE ? ", forced" : ""})`, + ); + if (needWork.length === 0) { + console.log("[summarize] nothing to do."); + return; + } + + const client = buildClient(); + const cacheUpsert = db.prepare( + `INSERT OR REPLACE INTO summary_cache (content_hash, summary, model, generated_at) + VALUES (?, ?, ?, datetime('now'))`, + ); + const nodeUpdate = db.prepare( + "UPDATE corpus_node SET summary = ? WHERE content_hash = ?", + ); + + const queue = [...needWork]; + let done = 0; + let inputTokens = 0; + let outputTokens = 0; + const errors: string[] = []; + + async function worker(): Promise { + while (true) { + const n = queue.shift(); + if (!n) return; + try { + const abs = path.join(CONTENT_DIR, n.file); + const fileText = fs.readFileSync(abs, "utf8"); + const lines = fileText.split(/\r?\n/); + const sectionText = lines.slice(n.line_start - 1, n.line_end).join("\n"); + const summary = await summarize(client, n.title, sectionText); + cacheUpsert.run(n.content_hash, summary, HAIKU_MODEL); + nodeUpdate.run(summary, n.content_hash); + done++; + if (done % 10 === 0 || done === needWork.length) { + console.log(`[summarize] ${done}/${needWork.length}`); + } + } catch (err) { + errors.push(`${n.slug}: ${String(err).slice(0, 200)}`); + } + } + } + + await Promise.all( + Array.from({ length: CONCURRENCY }, () => worker()), + ); + + console.log( + `[summarize] done. ${done} succeeded, ${errors.length} errors. ~tokens in/out: ${inputTokens}/${outputTokens}`, + ); + for (const e of errors.slice(0, 10)) console.log(" err:", e); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/web/src/lib/corpus-builder.ts b/web/src/lib/corpus-builder.ts index 701f705..5407558 100644 --- a/web/src/lib/corpus-builder.ts +++ b/web/src/lib/corpus-builder.ts @@ -170,10 +170,16 @@ export function buildCorpusIndex(): { added: number; total: number; pages: numbe stack.push({ ...h, slug, parent_slug: parent ? parent.slug : null, line_end }); } + const cacheLookup = db.prepare( + "SELECT summary FROM summary_cache WHERE content_hash = ?", + ); + for (const n of nodes) { const sectionLines = lines.slice(n.line - 1, n.line_end); const sectionText = sectionLines.join("\n"); - const summary = firstSentence(sectionText); + const content_hash = hash(sectionText); + const cached = cacheLookup.get(content_hash) as { summary: string } | undefined; + const summary = cached ? cached.summary : firstSentence(sectionText); upsert.run({ slug: n.slug, page_slug: page.slug, @@ -188,7 +194,7 @@ export function buildCorpusIndex(): { added: number; total: number; pages: numbe sort_order: globalOrder++, summary: summary || null, word_count: wordCount(sectionText), - content_hash: hash(sectionText), + content_hash, }); } } diff --git a/web/src/lib/db.ts b/web/src/lib/db.ts index 6f8d712..27f12e3 100644 --- a/web/src/lib/db.ts +++ b/web/src/lib/db.ts @@ -142,6 +142,16 @@ function migrate(db: Database.Database) { CREATE INDEX IF NOT EXISTS idx_corpus_node_phase ON corpus_node(phase_id, sort_order); CREATE INDEX IF NOT EXISTS idx_corpus_node_page ON corpus_node(page_slug, line_start); CREATE INDEX IF NOT EXISTS idx_corpus_node_parent ON corpus_node(parent_slug); + + -- Per-content-hash cache so LLM-written summaries persist across rebuilds. + -- Keyed by the section's content_hash; when a section's text changes, a + -- new hash is computed and a fresh summary will be generated on next run. + CREATE TABLE IF NOT EXISTS summary_cache ( + content_hash TEXT PRIMARY KEY, + summary TEXT NOT NULL, + model TEXT, + generated_at TEXT NOT NULL DEFAULT (datetime('now')) + ); `); } From 948941bf9c1a669e5205dfed37c4a27dc2455be4 Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Sun, 10 May 2026 12:51:03 -0400 Subject: [PATCH 4/9] feat(mcp): standalone MCP server exposing the 5 corpus tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the in-app chat tools as a stdio Model Context Protocol server so any MCP client (Claude Code, Claude Desktop, the official inspector, custom clients) can navigate the corpus directly. Reads the same SQLite DB the Next.js app writes to, so glossary + concept-tracker state are always in sync. No re-implementation of helpers — the server is a thin wrapper over `corpus-index.ts` and `repos.ts`. - `web/scripts/mcp-server.ts` — stdio server with 5 registered tools - `web/scripts/test-mcp-server.ts` — spawns it via the SDK Client and verifies every tool with realistic args (good slug, bad slug, cross-phase search, etc.) - `web/docs/mcp-server.md` — Claude Code registration steps + tool table Smoke-tested: list_tools returns 5 tools; get_corpus_outline, get_section (valid + bad slug → did_you_mean), search_corpus ("crown ether"), list_glossary, list_concepts(status=solid) all return the expected payloads. Why: the corpus-index is genuinely useful outside the browser app — Claude Code can look up sections while editing the markdown source, or other Claude Code projects (paper drafts, slides) can read the corpus without spinning up Next. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/docs/mcp-server.md | 71 +++ web/package-lock.json | 1000 +++++++++++++++++++++++++++++++- web/package.json | 2 + web/scripts/mcp-server.ts | 194 +++++++ web/scripts/test-mcp-server.ts | 91 +++ 5 files changed, 1357 insertions(+), 1 deletion(-) create mode 100644 web/docs/mcp-server.md create mode 100644 web/scripts/mcp-server.ts create mode 100644 web/scripts/test-mcp-server.ts diff --git a/web/docs/mcp-server.md b/web/docs/mcp-server.md new file mode 100644 index 0000000..383b534 --- /dev/null +++ b/web/docs/mcp-server.md @@ -0,0 +1,71 @@ +# Lithium corpus MCP server + +The same five tools that power the in-app chat (`get_corpus_outline`, +`get_section`, `search_corpus`, `list_glossary`, `list_concepts`) are also +exposed as a standalone Model Context Protocol server, so any MCP client — +Claude Code, Claude Desktop, the official MCP inspector, custom clients — +can navigate the corpus directly. + +The server reads the same SQLite DB the Next.js app writes to, so glossary +and concept-tracker state are always in sync. + +## Run locally + +```bash +cd web +npm run mcp # spawns the stdio server; expects an MCP client to attach +``` + +For a quick smoke test that exercises every tool: + +```bash +cd web +npx tsx scripts/test-mcp-server.ts +``` + +## Register with Claude Code + +Add this to your `~/.claude.json` under `mcpServers` (or to a project's +`.claude.json`): + +```json +{ + "mcpServers": { + "lithium-corpus": { + "command": "tsx", + "args": ["/abs/path/to/lithium/web/scripts/mcp-server.ts"], + "env": { + "LITHIUM_CONTENT_DIR": "/abs/path/to/lithium/zero", + "LITHIUM_DB_PATH": "/abs/path/to/lithium/web/data/lithium.db" + } + } + } +} +``` + +After restart, Claude Code will see five tools prefixed with +`mcp__lithium-corpus__…`. They'll show up in `/mcp` and can be allowlisted +the usual way. + +## Tools + +| Tool | Args | What it returns | +|---|---|---| +| `get_corpus_outline` | `{ phase_id?, max_level? = 2, include_summaries? = true }` | Markdown tree of slugs with one-sentence summaries | +| `get_section` | `{ slug }` | Verbatim Markdown of that node, or `did_you_mean` on miss | +| `search_corpus` | `{ query, limit? = 8 }` | Ranked hits with title + summary | +| `list_glossary` | `{ prefix? }` | All persisted glossary entries | +| `list_concepts` | `{ phase_id?, status? }` | Concept-tracker entries with statuses | + +## Why this exists + +The same corpus-index that grounds the in-app chat is genuinely useful from +the outside. Concrete uses: + +- **Reading the corpus from Claude Code** while editing the markdown source + itself — Claude Code can use `get_section` to look up a definition without + bouncing the user back to the browser. +- **Writing about the corpus** elsewhere (a separate Claude Code project + drafting a paper, slides, etc.) without needing the Next.js app running. +- **Future MCP clients** that might want to ask the corpus questions + programmatically. diff --git a/web/package-lock.json b/web/package-lock.json index b1faf79..2da9688 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.1.10", "@anthropic-ai/sdk": "^0.40.1", + "@modelcontextprotocol/sdk": "^1.29.0", "better-sqlite3": "^12.4.1", "front-matter": "^4.0.2", "highlight.js": "^11.10.0", @@ -558,6 +559,18 @@ "node": ">=18" } }, + "node_modules/@hono/node-server": { + "version": "1.19.14", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.14.tgz", + "integrity": "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==", + "license": "MIT", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, "node_modules/@img/colour": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz", @@ -1074,6 +1087,46 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.29.0.tgz", + "integrity": "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==", + "license": "MIT", + "dependencies": { + "@hono/node-server": "^1.19.9", + "ajv": "^8.17.1", + "ajv-formats": "^3.0.1", + "content-type": "^1.0.5", + "cors": "^2.8.5", + "cross-spawn": "^7.0.5", + "eventsource": "^3.0.2", + "eventsource-parser": "^3.0.0", + "express": "^5.2.1", + "express-rate-limit": "^8.2.1", + "hono": "^4.11.4", + "jose": "^6.1.3", + "json-schema-typed": "^8.0.2", + "pkce-challenge": "^5.0.0", + "raw-body": "^3.0.0", + "zod": "^3.25 || ^4.0", + "zod-to-json-schema": "^3.25.1" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@cfworker/json-schema": "^4.1.1", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "@cfworker/json-schema": { + "optional": true + }, + "zod": { + "optional": false + } + } + }, "node_modules/@next/env": { "version": "15.5.18", "resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.18.tgz", @@ -1614,6 +1667,44 @@ "node": ">=6.5" } }, + "node_modules/accepts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "license": "MIT", + "dependencies": { + "mime-types": "^3.0.0", + "negotiator": "^1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/accepts/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/accepts/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/agentkeepalive": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", @@ -1626,6 +1717,39 @@ "node": ">= 8.0.0" } }, + "node_modules/ajv": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz", + "integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -1755,6 +1879,30 @@ "readable-stream": "^3.4.0" } }, + "node_modules/body-parser": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", + "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==", + "license": "MIT", + "dependencies": { + "bytes": "^3.1.2", + "content-type": "^1.0.5", + "debug": "^4.4.3", + "http-errors": "^2.0.0", + "iconv-lite": "^0.7.0", + "on-finished": "^2.4.1", + "qs": "^6.14.1", + "raw-body": "^3.0.1", + "type-is": "^2.0.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/browserslist": { "version": "4.28.2", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz", @@ -1813,6 +1961,15 @@ "ieee754": "^1.1.13" } }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/call-bind-apply-helpers": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", @@ -1826,6 +1983,22 @@ "node": ">= 0.4" } }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/caniuse-lite": { "version": "1.0.30001792", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001792.tgz", @@ -1939,6 +2112,77 @@ "node": ">= 12" } }, + "node_modules/content-disposition": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.1.0.tgz", + "integrity": "sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz", + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==", + "license": "MIT", + "engines": { + "node": ">=6.6.0" + } + }, + "node_modules/cors": { + "version": "2.8.6", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz", + "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==", + "license": "MIT", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/csstype": { "version": "3.2.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", @@ -2008,6 +2252,15 @@ "node": ">=0.4.0" } }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/dequal": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", @@ -2053,6 +2306,12 @@ "node": ">= 0.4" } }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, "node_modules/electron-to-chromium": { "version": "1.5.353", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.353.tgz", @@ -2060,6 +2319,15 @@ "dev": true, "license": "ISC" }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/end-of-stream": { "version": "1.4.5", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", @@ -2192,6 +2460,12 @@ "node": ">=6" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, "node_modules/escape-string-regexp": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", @@ -2227,6 +2501,15 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/event-target-shim": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", @@ -2236,6 +2519,27 @@ "node": ">=6" } }, + "node_modules/eventsource": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "license": "MIT", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.8.tgz", + "integrity": "sha512-70QWGkr4snxr0OXLRWsFLeRBIRPuQOvt4s8QYjmUlmlkyTZkRqS7EDVRZtzU3TiyDbXSzaOeF0XUKy8PchzukQ==", + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/expand-template": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", @@ -2245,18 +2549,147 @@ "node": ">=6" } }, + "node_modules/express": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", + "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", + "license": "MIT", + "dependencies": { + "accepts": "^2.0.0", + "body-parser": "^2.2.1", + "content-disposition": "^1.0.0", + "content-type": "^1.0.5", + "cookie": "^0.7.1", + "cookie-signature": "^1.2.1", + "debug": "^4.4.0", + "depd": "^2.0.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "finalhandler": "^2.1.0", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "merge-descriptors": "^2.0.0", + "mime-types": "^3.0.0", + "on-finished": "^2.4.1", + "once": "^1.4.0", + "parseurl": "^1.3.3", + "proxy-addr": "^2.0.7", + "qs": "^6.14.0", + "range-parser": "^1.2.1", + "router": "^2.2.0", + "send": "^1.1.0", + "serve-static": "^2.2.0", + "statuses": "^2.0.1", + "type-is": "^2.0.1", + "vary": "^1.1.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express-rate-limit": { + "version": "8.5.1", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.1.tgz", + "integrity": "sha512-5O6KYmyJEpuPJV5hNTXKbAHWRqrzyu+OI3vUnSd2kXFubIVpG7ezpgxQy76Zo5GQZtrQBg86hF+CM/NX+cioiQ==", + "license": "MIT", + "dependencies": { + "ip-address": "^10.2.0" + }, + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, + "node_modules/express/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/express/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", "license": "MIT" }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "license": "MIT" + }, + "node_modules/fast-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz", + "integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/file-uri-to-path": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", "license": "MIT" }, + "node_modules/finalhandler": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz", + "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "on-finished": "^2.4.1", + "parseurl": "^1.3.3", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/form-data": { "version": "4.0.5", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", @@ -2292,6 +2725,15 @@ "node": ">= 12.20" } }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/fraction.js": { "version": "5.3.4", "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz", @@ -2306,6 +2748,15 @@ "url": "https://github.com/sponsors/rawify" } }, + "node_modules/fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz", + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/front-matter": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/front-matter/-/front-matter-4.0.2.tgz", @@ -2712,6 +3163,15 @@ "node": ">=12.0.0" } }, + "node_modules/hono": { + "version": "4.12.18", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.18.tgz", + "integrity": "sha512-RWzP96k/yv0PQfyXnWjs6zot20TqfpfsNXhOnev8d1InAxubW93L11/oNUc3tQqn2G0bSdAOBpX+2uDFHV7kdQ==", + "license": "MIT", + "engines": { + "node": ">=16.9.0" + } + }, "node_modules/html-url-attributes": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz", @@ -2732,6 +3192,26 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/humanize-ms": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", @@ -2741,6 +3221,22 @@ "ms": "^2.0.0" } }, + "node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -2779,7 +3275,25 @@ "integrity": "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==", "license": "MIT" }, - "node_modules/is-alphabetical": { + "node_modules/ip-address": { + "version": "10.2.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.2.0.tgz", + "integrity": "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/is-alphabetical": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz", "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==", @@ -2835,6 +3349,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-promise": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==", + "license": "MIT" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, "node_modules/jiti": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.7.0.tgz", @@ -2845,6 +3371,15 @@ "jiti": "lib/jiti-cli.mjs" } }, + "node_modules/jose": { + "version": "6.2.3", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz", + "integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/js-yaml": { "version": "3.14.2", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.2.tgz", @@ -2858,6 +3393,18 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/json-schema-typed": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz", + "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==", + "license": "BSD-2-Clause" + }, "node_modules/katex": { "version": "0.16.45", "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.45.tgz", @@ -3487,6 +4034,27 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/media-typer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz", + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/merge-descriptors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/micromark": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz", @@ -4147,6 +4715,15 @@ "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", "license": "MIT" }, + "node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/next": { "version": "15.5.18", "resolved": "https://registry.npmjs.org/next/-/next-15.5.18.tgz", @@ -4286,6 +4863,39 @@ "dev": true, "license": "MIT" }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -4332,12 +4942,49 @@ "url": "https://github.com/inikulin/parse5?sponsor=1" } }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-to-regexp": { + "version": "8.4.2", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.4.2.tgz", + "integrity": "sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/picocolors": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", "license": "ISC" }, + "node_modules/pkce-challenge": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", + "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==", + "license": "MIT", + "engines": { + "node": ">=16.20.0" + } + }, "node_modules/postcss": { "version": "8.5.14", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.14.tgz", @@ -4411,6 +5058,19 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/pump": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", @@ -4421,6 +5081,45 @@ "once": "^1.3.1" } }, + "node_modules/qs": { + "version": "6.15.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.1.tgz", + "integrity": "sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz", + "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.7.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/rc": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", @@ -4648,6 +5347,15 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", @@ -4658,6 +5366,22 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/router": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "depd": "^2.0.0", + "is-promise": "^4.0.0", + "parseurl": "^1.3.3", + "path-to-regexp": "^8.0.0" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", @@ -4678,6 +5402,12 @@ ], "license": "MIT" }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, "node_modules/scheduler": { "version": "0.27.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", @@ -4696,12 +5426,88 @@ "node": ">=10" } }, + "node_modules/send": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz", + "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.1", + "mime-types": "^3.0.2", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/send/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/send/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/serve-static": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz", + "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==", + "license": "MIT", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/server-only": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/server-only/-/server-only-0.0.1.tgz", "integrity": "sha512-qepMx2JxAa5jjfzxG79yPPq+8BuFToHd1hm7kI+Z4zAq1ftQiP7HcxMhDDItrbtwVeLg/cY2JnKnrcFkmiswNA==", "license": "MIT" }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, "node_modules/sharp": { "version": "0.34.5", "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", @@ -5032,6 +5838,99 @@ "url": "https://opencollective.com/libvips" } }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.1.tgz", + "integrity": "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.4" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/simple-concat": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", @@ -5102,6 +6001,15 @@ "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", "license": "BSD-3-Clause" }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/string_decoder": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", @@ -5224,6 +6132,15 @@ "node": ">=6" } }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, "node_modules/tr46": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", @@ -5288,6 +6205,45 @@ "node": "*" } }, + "node_modules/type-is": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz", + "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==", + "license": "MIT", + "dependencies": { + "content-type": "^1.0.5", + "media-typer": "^1.1.0", + "mime-types": "^3.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/type-is/node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/type-is/node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -5423,6 +6379,15 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/update-browserslist-db": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", @@ -5460,6 +6425,15 @@ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", "license": "MIT" }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/vfile": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", @@ -5537,6 +6511,21 @@ "webidl-conversions": "^3.0.0" } }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", @@ -5552,6 +6541,15 @@ "url": "https://github.com/sponsors/colinhacks" } }, + "node_modules/zod-to-json-schema": { + "version": "3.25.2", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.2.tgz", + "integrity": "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.25.28 || ^4" + } + }, "node_modules/zwitch": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", diff --git a/web/package.json b/web/package.json index a2099df..2395cc1 100644 --- a/web/package.json +++ b/web/package.json @@ -10,11 +10,13 @@ "ingest": "tsx scripts/ingest.ts", "build-index": "tsx scripts/build-corpus-index.ts", "summarize": "tsx scripts/summarize-corpus.ts", + "mcp": "tsx scripts/mcp-server.ts", "typecheck": "tsc --noEmit" }, "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.1.10", "@anthropic-ai/sdk": "^0.40.1", + "@modelcontextprotocol/sdk": "^1.29.0", "better-sqlite3": "^12.4.1", "front-matter": "^4.0.2", "highlight.js": "^11.10.0", diff --git a/web/scripts/mcp-server.ts b/web/scripts/mcp-server.ts new file mode 100644 index 0000000..a9fb958 --- /dev/null +++ b/web/scripts/mcp-server.ts @@ -0,0 +1,194 @@ +#!/usr/bin/env node +/** + * Standalone MCP server exposing the Lithium learning-corpus navigation tools + * to any MCP client (Claude Code, Claude Desktop, the official inspector, etc.). + * + * The server reads the same SQLite DB that the Next.js app writes to — + * `web/data/lithium.db` by default, override with LITHIUM_DB_PATH. + * + * Tools (mirrors the in-app tools): + * - get_corpus_outline({ phase_id?, max_level?, include_summaries? }) + * - get_section({ slug }) + * - search_corpus({ query, limit? }) + * - list_glossary({ prefix? }) + * - list_concepts({ phase_id?, status? }) + * + * Run: + * npm run mcp # stdio transport + * + * Register with Claude Code (~/.claude/settings.json or .claude.json): + * { + * "mcpServers": { + * "lithium-corpus": { + * "command": "tsx", + * "args": ["/abs/path/to/web/scripts/mcp-server.ts"], + * "env": { "LITHIUM_CONTENT_DIR": "/abs/path/to/zero", + * "LITHIUM_DB_PATH": "/abs/path/to/web/data/lithium.db" } + * } + * } + * } + */ +import { z } from "zod"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { + getSection, + renderOutlineMarkdown, + searchCorpus, +} from "../src/lib/corpus-index"; +import { listGlossary, listConceptsByPhase } from "../src/lib/repos"; +import { ensureSeeded } from "../src/lib/content-loader"; + +ensureSeeded(); + +const server = new McpServer( + { name: "lithium-corpus", version: "0.1.0" }, + { + instructions: + "Navigate the Lithium learning corpus (chemistry → water-treatment → lithium-isotope-separation). Always start with `get_corpus_outline` to see what's available; use `get_section` to fetch verbatim Markdown for any node, `search_corpus` to find sections by keyword, and `list_glossary`/`list_concepts` to inspect the learner's persisted state.", + }, +); + +server.registerTool( + "get_corpus_outline", + { + title: "Get corpus outline", + description: + "Return the curriculum's table-of-contents tree as compact Markdown. Defaults: H1 + H2, with one-sentence summaries. Pass max_level=3 to include H3 subsections, or phase_id to scope to one phase.", + inputSchema: { + phase_id: z.string().optional(), + max_level: z.number().int().min(1).max(6).optional(), + include_summaries: z.boolean().optional(), + }, + }, + async ({ phase_id, max_level, include_summaries }) => { + const md = renderOutlineMarkdown({ + phase_id, + maxLevel: max_level ?? 2, + summaryLen: include_summaries === false ? 0 : 80, + }); + return { + content: [{ type: "text", text: md }], + }; + }, +); + +server.registerTool( + "get_section", + { + title: "Get section", + description: + "Fetch the verbatim Markdown of a corpus node by slug. On miss, the response includes a `did_you_mean` list of similar slugs.", + inputSchema: { + slug: z.string(), + }, + }, + async ({ slug }) => { + const r = getSection(slug); + if (!r.ok) { + return { + content: [ + { + type: "text", + text: + `Section "${slug}" not found.\n\nDid you mean:\n` + + r.did_you_mean + .map((d) => ` - ${d.slug} — ${d.title} (${d.reason})`) + .join("\n"), + }, + ], + isError: true, + }; + } + return { + content: [ + { + type: "text", + text: `# ${r.title}\n\n_(slug: ${r.slug}, file: ${r.page_slug}, lines ${r.line_start}-${r.line_end}, ${r.word_count} words)_\n\n${r.content}`, + }, + ], + }; + }, +); + +server.registerTool( + "search_corpus", + { + title: "Search corpus", + description: + "Substring + token-overlap search over node titles and summaries. Returns ranked hits.", + inputSchema: { + query: z.string(), + limit: z.number().int().min(1).max(20).optional(), + }, + }, + async ({ query, limit }) => { + const hits = searchCorpus(query, limit ?? 8); + const text = hits.length + ? hits + .map( + (h) => + `[${h.score}] \`${h.slug}\` — ${h.title}${h.summary ? `\n ${h.summary.slice(0, 160)}` : ""}`, + ) + .join("\n\n") + : `(no hits for "${query}")`; + return { content: [{ type: "text", text }] }; + }, +); + +server.registerTool( + "list_glossary", + { + title: "List glossary terms", + description: + "Return the glossary terms persisted by the learner (term + symbol + units + short definition).", + inputSchema: { + prefix: z.string().optional(), + }, + }, + async ({ prefix }) => { + const rows = listGlossary(prefix ?? undefined); + const text = rows.length + ? rows + .map((r) => { + const sym = r.symbol ? ` (${r.symbol}${r.units ? `, ${r.units}` : ""})` : ""; + return `**${r.term}**${sym} — ${r.definition.slice(0, 200)}`; + }) + .join("\n\n") + : "(glossary is empty)"; + return { content: [{ type: "text", text }] }; + }, +); + +server.registerTool( + "list_concepts", + { + title: "List knowledge-tracker concepts", + description: + "List concept slugs (with current status) from the learner's knowledge tracker. Filter by phase_id and/or status.", + inputSchema: { + phase_id: z.string().optional(), + status: z.enum(["todo", "exposed", "comfortable", "solid"]).optional(), + }, + }, + async ({ phase_id, status }) => { + const rows = listConceptsByPhase(phase_id); + const filtered = status ? rows.filter((c) => c.status === status) : rows; + const text = filtered.length + ? filtered + .map((c) => `- [${c.status}] \`${c.slug}\`${c.section ? ` (${c.section})` : ""}\n ${c.label}`) + .join("\n") + : "(no concepts match)"; + return { content: [{ type: "text", text }] }; + }, +); + +async function main() { + const transport = new StdioServerTransport(); + await server.connect(transport); +} + +main().catch((err) => { + console.error("[mcp-server] fatal:", err); + process.exit(1); +}); diff --git a/web/scripts/test-mcp-server.ts b/web/scripts/test-mcp-server.ts new file mode 100644 index 0000000..ddbd31a --- /dev/null +++ b/web/scripts/test-mcp-server.ts @@ -0,0 +1,91 @@ +/** + * Smoke test for the lithium-corpus MCP server. Spawns it via stdio, + * lists tools, calls each one with realistic args, and prints the responses. + */ +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; +import path from "node:path"; + +async function main() { + const root = path.resolve(__dirname, ".."); + const transport = new StdioClientTransport({ + command: "npx", + args: ["-y", "tsx", path.join(root, "scripts/mcp-server.ts")], + cwd: root, + env: process.env as Record, + }); + const client = new Client({ name: "test-client", version: "0.0.1" }); + await client.connect(transport); + + const meta = await client.listTools(); + console.log("\n== TOOLS =="); + for (const t of meta.tools) { + console.log(` • ${t.name} — ${t.description?.slice(0, 80) ?? ""}`); + } + + function show(label: string, res: unknown) { + const r = res as { content?: Array<{ type: string; text?: string }>; isError?: boolean }; + const txt = (r.content ?? []) + .map((c) => (c.type === "text" ? c.text ?? "" : `(${c.type})`)) + .join("\n"); + console.log(`\n== ${label}${r.isError ? " (error)" : ""} ==`); + console.log(txt.slice(0, 800) + (txt.length > 800 ? "\n…(truncated)" : "")); + } + + show( + "get_corpus_outline (phase_id: 02-water-treatment, max_level: 2)", + await client.callTool({ + name: "get_corpus_outline", + arguments: { phase_id: "02-water-treatment", max_level: 2 }, + }), + ); + + show( + "get_section (valid)", + await client.callTool({ + name: "get_section", + arguments: { + slug: "01-chemistry-fundamentals/06-isotopes#rigorous-statement", + }, + }), + ); + + show( + "get_section (bad slug)", + await client.callTool({ + name: "get_section", + arguments: { slug: "01-chemistry-fundamentals/atomic-hypothesis" }, + }), + ); + + show( + "search_corpus (crown ether)", + await client.callTool({ + name: "search_corpus", + arguments: { query: "crown ether liquid extraction", limit: 5 }, + }), + ); + + show( + "list_glossary", + await client.callTool({ + name: "list_glossary", + arguments: {}, + }), + ); + + show( + "list_concepts (status: solid)", + await client.callTool({ + name: "list_concepts", + arguments: { status: "solid" }, + }), + ); + + await client.close(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); From 722f2e673e10f6d4971baf3acd65c8a1ba362906 Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Sun, 10 May 2026 12:53:37 -0400 Subject: [PATCH 5/9] feat(corpus-index): swap substring search for SQLite FTS5 with bm25 + Porter stemming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The substring + token-overlap approach was adequate at 446 nodes but ranked poorly: it gave equal weight to a stray match in a slug and a strong match in a section's body, and missed morphological variants ("isotopes" didn't match "isotopic"). This commit swaps in a proper full-text index: - New `corpus_fts` virtual table (FTS5, porter+unicode61 tokenizer) with columns slug (UNINDEXED), title, summary, content. Rebuilt wholesale by `corpus-builder.ts` alongside `corpus_node` so they never drift. - `searchCorpus()` now runs `MATCH ?` with a sanitized OR-of-tokens query and ranks by `bm25(corpus_fts, 8.0, 4.0, 1.0)` (title weighted highest, then summary, then body). Score is normalized so callers can keep "higher = better". - Falls back to the old substring scan if the FTS query is empty (all stop-words/punctuation) or the virtual table is missing. Verified on representative queries: - "crown ether liquid extraction" → top hits are crown-ether LLX sections in both 02-water-treatment and 03-lithium-isotope-separation - "mercury amalgam isotope" → top hits are COLEX principle + Y-12 legacy - "fusion blanket tritium" → top hit is "The tritium breeding ratio (TBR)" - "Avogadro Brownian Einstein" → top hit is "How we *know* atoms exist" - Porter stemming check: searchCorpus("isotopes") and searchCorpus("isotopic") return identical top hits (same lemma) Tool descriptions in tutor-prompt.ts and mcp-server.ts updated to note FTS5 capabilities (stemming, phrase support) so the model picks better keywords. Co-Authored-By: Claude Opus 4.7 (1M context) --- web/package-lock.json | 13 ++++ web/package.json | 1 + web/scripts/check-corpus.ts | 25 ++++++- web/scripts/mcp-server.ts | 2 +- web/src/lib/corpus-builder.ts | 8 +++ web/src/lib/corpus-index.ts | 119 ++++++++++++++++++++++++---------- web/src/lib/db.ts | 11 ++++ web/src/lib/tutor-prompt.ts | 4 +- 8 files changed, 145 insertions(+), 38 deletions(-) diff --git a/web/package-lock.json b/web/package-lock.json index 2da9688..ccdfc61 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -12,6 +12,7 @@ "@anthropic-ai/sdk": "^0.40.1", "@modelcontextprotocol/sdk": "^1.29.0", "better-sqlite3": "^12.4.1", + "dotenv": "^17.4.2", "front-matter": "^4.0.2", "highlight.js": "^11.10.0", "katex": "^0.16.22", @@ -2292,6 +2293,18 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/dotenv": { + "version": "17.4.2", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz", + "integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", diff --git a/web/package.json b/web/package.json index 2395cc1..c0fcde1 100644 --- a/web/package.json +++ b/web/package.json @@ -18,6 +18,7 @@ "@anthropic-ai/sdk": "^0.40.1", "@modelcontextprotocol/sdk": "^1.29.0", "better-sqlite3": "^12.4.1", + "dotenv": "^17.4.2", "front-matter": "^4.0.2", "highlight.js": "^11.10.0", "katex": "^0.16.22", diff --git a/web/scripts/check-corpus.ts b/web/scripts/check-corpus.ts index 258ccd0..8b1a0d4 100644 --- a/web/scripts/check-corpus.ts +++ b/web/scripts/check-corpus.ts @@ -4,8 +4,31 @@ const compact = renderOutlineMarkdown({ maxLevel: 2, summaryLen: 80 }); const full = renderOutlineMarkdown({ maxLevel: 6, summaryLen: 140 }); console.log("compact:", compact.length, "bytes /", compact.split("\n").length, "lines"); console.log("full: ", full.length, "bytes /", full.split("\n").length, "lines"); + +console.log("\n--- FTS5 search: 'crown ether liquid extraction' ---"); +for (const h of searchCorpus("crown ether liquid extraction", 6)) { + console.log(` [${h.score.toFixed(2)}] ${h.slug} :: ${h.title}`); +} +console.log("\n--- FTS5 search: 'mercury amalgam isotope' ---"); +for (const h of searchCorpus("mercury amalgam isotope", 6)) { + console.log(` [${h.score.toFixed(2)}] ${h.slug} :: ${h.title}`); +} +console.log("\n--- FTS5 search: 'fusion blanket tritium' ---"); +for (const h of searchCorpus("fusion blanket tritium", 6)) { + console.log(` [${h.score.toFixed(2)}] ${h.slug} :: ${h.title}`); +} +console.log("\n--- FTS5 search: 'Avogadro Brownian Einstein' ---"); +for (const h of searchCorpus("Avogadro Brownian Einstein", 6)) { + console.log(` [${h.score.toFixed(2)}] ${h.slug} :: ${h.title}`); +} +console.log("\n--- FTS5 stemming check: 'isotopes' vs 'isotopic' ---"); +const a = searchCorpus("isotopes", 3); +const b = searchCorpus("isotopic", 3); +console.log(" 'isotopes' top:", a.map((h) => h.slug).join(" | ")); +console.log(" 'isotopic' top:", b.map((h) => h.slug).join(" | ")); + const md = compact; -console.log("outline bytes:", md.length, "lines:", md.split("\n").length); +console.log("\n\noutline bytes:", md.length, "lines:", md.split("\n").length); console.log("first 800 chars:\n" + md.slice(0, 800)); console.log("\n--- search 'crown ether ion exchange' ---"); for (const h of searchCorpus("crown ether ion exchange", 6)) { diff --git a/web/scripts/mcp-server.ts b/web/scripts/mcp-server.ts index a9fb958..9eedd43 100644 --- a/web/scripts/mcp-server.ts +++ b/web/scripts/mcp-server.ts @@ -116,7 +116,7 @@ server.registerTool( { title: "Search corpus", description: - "Substring + token-overlap search over node titles and summaries. Returns ranked hits.", + "FTS5-backed search across node titles, summaries, and verbatim section text. BM25-ranked with Porter stemming. Returns ranked hits with score + summary snippet.", inputSchema: { query: z.string(), limit: z.number().int().min(1).max(20).optional(), diff --git a/web/src/lib/corpus-builder.ts b/web/src/lib/corpus-builder.ts index 5407558..d19071c 100644 --- a/web/src/lib/corpus-builder.ts +++ b/web/src/lib/corpus-builder.ts @@ -108,8 +108,15 @@ export function buildCorpusIndex(): { added: number; total: number; pages: numbe `); const deletePageNodes = db.prepare("DELETE FROM corpus_node WHERE page_slug = ?"); + const deleteFtsAll = db.prepare("DELETE FROM corpus_fts"); + const insertFts = db.prepare( + "INSERT INTO corpus_fts (slug, title, summary, content) VALUES (?, ?, ?, ?)", + ); const tx = db.transaction(() => { + // The FTS table is rebuilt wholesale — cheaper than per-row maintenance + // for our scale and avoids drift if header-deletes leave orphan rows. + deleteFtsAll.run(); let globalOrder = 0; for (const page of pages) { // Wipe and re-insert this page's nodes — keeps the table clean across @@ -196,6 +203,7 @@ export function buildCorpusIndex(): { added: number; total: number; pages: numbe word_count: wordCount(sectionText), content_hash, }); + insertFts.run(n.slug, n.title, summary ?? "", sectionText); } } }); diff --git a/web/src/lib/corpus-index.ts b/web/src/lib/corpus-index.ts index cd3a63c..2c98e44 100644 --- a/web/src/lib/corpus-index.ts +++ b/web/src/lib/corpus-index.ts @@ -171,45 +171,96 @@ export type SearchHit = { }; /** - * Substring + token-overlap search across titles and summaries. No vectors. - * Adequate for our small corpus; replaceable with FTS5 later if it gets slow. + * Sanitize an arbitrary user query for FTS5 MATCH. We split on whitespace, + * drop punctuation/operators (so a stray `-` or `(` doesn't break parsing), + * quote each remaining token to force a phrase-literal match, and OR-join + * them. Tokens shorter than 2 chars are dropped. + */ +function toFtsQuery(raw: string): string { + const tokens = raw + .toLowerCase() + .split(/[^\p{Letter}\p{Number}]+/u) + .filter((t) => t.length >= 2); + if (tokens.length === 0) return ""; + return tokens.map((t) => `"${t.replace(/"/g, "")}"`).join(" OR "); +} + +/** + * FTS5-backed search across node titles, summaries, and verbatim section + * text. Ranked by bm25 (lower = better; we negate to expose `score` where + * higher = better, matching the prior contract). Falls back to a tiny + * substring scan if the FTS query is empty (e.g., all-stopwords) or if the + * virtual table happens to be missing for some reason. */ export function searchCorpus(query: string, limit = 8): SearchHit[] { - const q = query.trim().toLowerCase(); + const q = query.trim(); if (!q) return []; - const tokens = Array.from( - new Set(q.split(/\s+/).filter((t) => t.length >= 3)), - ); + const fts = toFtsQuery(q); + if (!fts) return []; - const rows = getDb() - .prepare( - `SELECT slug, title, page_slug, phase_id, level, summary - FROM corpus_node`, - ) - .all() as Array<{ - slug: string; - title: string; - page_slug: string; - phase_id: string; - level: number; - summary: string | null; - }>; - - const scored = rows - .map((r) => { - const hay = `${r.title} ${r.summary ?? ""} ${r.slug}`.toLowerCase(); - let score = 0; - if (hay.includes(q)) score += 5; - for (const t of tokens) if (hay.includes(t)) score += 1; - // Prefer deeper sections (more specific) over root pages. - if (score > 0 && r.level > 1) score += 0.25; - return { ...r, score }; - }) - .filter((r) => r.score > 0) - .sort((a, b) => b.score - a.score) - .slice(0, limit); + const db = getDb(); + try { + const rows = db + .prepare( + `SELECT n.slug, + n.title, + n.page_slug, + n.phase_id, + n.level, + n.summary, + bm25(corpus_fts, 8.0, 4.0, 1.0) AS bm25_score + FROM corpus_fts + JOIN corpus_node n ON n.slug = corpus_fts.slug + WHERE corpus_fts MATCH ? + ORDER BY bm25_score ASC + LIMIT ?`, + ) + .all(fts, limit) as Array<{ + slug: string; + title: string; + page_slug: string; + phase_id: string; + level: number; + summary: string | null; + bm25_score: number; + }>; - return scored; + return rows.map((r) => { + const { bm25_score, ...rest } = r; + // Normalize: bm25 is unbounded negative-ish for good matches. Expose + // a positive `score` so callers can keep "higher = better". + const score = Math.round((10 + -1 * bm25_score) * 100) / 100; + return { ...rest, score }; + }); + } catch { + // Last-resort fallback: substring scan. Should not happen in practice. + const tokens = Array.from( + new Set(q.toLowerCase().split(/\s+/).filter((t) => t.length >= 3)), + ); + const all = db + .prepare( + `SELECT slug, title, page_slug, phase_id, level, summary FROM corpus_node`, + ) + .all() as Array<{ + slug: string; + title: string; + page_slug: string; + phase_id: string; + level: number; + summary: string | null; + }>; + return all + .map((r) => { + const hay = `${r.title} ${r.summary ?? ""}`.toLowerCase(); + let score = 0; + if (hay.includes(q.toLowerCase())) score += 5; + for (const t of tokens) if (hay.includes(t)) score += 1; + return { ...r, score }; + }) + .filter((r) => r.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, limit); + } } /** diff --git a/web/src/lib/db.ts b/web/src/lib/db.ts index 27f12e3..9fb272a 100644 --- a/web/src/lib/db.ts +++ b/web/src/lib/db.ts @@ -152,6 +152,17 @@ function migrate(db: Database.Database) { model TEXT, generated_at TEXT NOT NULL DEFAULT (datetime('now')) ); + + -- Full-text-search index over corpus nodes. Rebuilt by corpus-builder + -- alongside corpus_node. Standalone (not contentless) so the indexed + -- columns are stored verbatim and bm25 ranking can run. + CREATE VIRTUAL TABLE IF NOT EXISTS corpus_fts USING fts5( + slug UNINDEXED, + title, + summary, + content, + tokenize = 'porter unicode61' + ); `); } diff --git a/web/src/lib/tutor-prompt.ts b/web/src/lib/tutor-prompt.ts index abce100..a8668f8 100644 --- a/web/src/lib/tutor-prompt.ts +++ b/web/src/lib/tutor-prompt.ts @@ -84,9 +84,9 @@ export const TUTOR_TOOL_ZOD = { }, search_corpus: { description: - "Substring + token-overlap search over node titles and summaries. Returns up to N relevant nodes ranked by score. Use when you don't know the exact slug for a topic but know what to look for.", + "FTS5-backed search across node titles, summaries, and verbatim section text. BM25-ranked, with Porter stemming (so 'isotopes' and 'isotopic' match the same forms). Use when you don't know the exact slug; pass 2–5 substantive keywords.", shape: { - query: z.string().describe("A short search query (e.g., 'crown ether lithium')."), + query: z.string().describe("Keywords (e.g., 'crown ether lithium')."), limit: z .number() .int() From 3d4596b2df3026b2268e22f6f12d1b614c9aea3a Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Mon, 11 May 2026 08:47:18 -0400 Subject: [PATCH 6/9] chore(deploy): Cloudflare Origin CA + ufw lockdown + small fixes - Caddyfile: serve Cloudflare Origin CA cert (valid until 2041) instead of ACME. Required so Caddy stops attempting HTTP-01 renewals after the hostname is fronted by Cloudflare's proxy. - lock-origin-to-cloudflare.sh: restrict ufw 80/443 to Cloudflare IP ranges only. SSH on 22 stays open. Idempotent via /var/lib/cloudflare-ufw.list. - deploy.sh: drop sudo from the diagnostic `systemctl status` call. The extra `--no-pager --lines=10` args don't match the sudoers rule, which caused the deploy to hang on a password prompt. - bootstrap.sh: chown the lithium user's authorized_keys after the root-shell redirect (the `>>` ran as root, not as the sudo'd user). Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy/Caddyfile | 7 +-- deploy/bootstrap.sh | 1 + deploy/deploy.sh | 2 +- deploy/lock-origin-to-cloudflare.sh | 87 +++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 6 deletions(-) create mode 100755 deploy/lock-origin-to-cloudflare.sh diff --git a/deploy/Caddyfile b/deploy/Caddyfile index d6d77bb..69f814e 100644 --- a/deploy/Caddyfile +++ b/deploy/Caddyfile @@ -1,12 +1,9 @@ -{ - email tarek.kekhia@emeraldlake.io -} - from-zero.emeraldlake.io { + tls /etc/caddy/tls/origin.pem /etc/caddy/tls/origin.key + encode zstd gzip reverse_proxy 127.0.0.1:3717 - # Sensible defaults request_body { max_size 5MB } diff --git a/deploy/bootstrap.sh b/deploy/bootstrap.sh index 5b6d5a9..d6a30dd 100755 --- a/deploy/bootstrap.sh +++ b/deploy/bootstrap.sh @@ -133,6 +133,7 @@ Next steps: sudo -u ${APP_USER} ssh-keygen -t ed25519 -f /home/${APP_USER}/.ssh/id_ed25519 -N '' sudo -u ${APP_USER} cat /home/${APP_USER}/.ssh/id_ed25519.pub >> /home/${APP_USER}/.ssh/authorized_keys + chown ${APP_USER}:${APP_USER} /home/${APP_USER}/.ssh/authorized_keys sudo -u ${APP_USER} chmod 600 /home/${APP_USER}/.ssh/authorized_keys sudo -u ${APP_USER} cat /home/${APP_USER}/.ssh/id_ed25519 # private — paste into GH secret SSH_KEY diff --git a/deploy/deploy.sh b/deploy/deploy.sh index d0767ae..a309dfd 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -46,4 +46,4 @@ echo "==> [deploy] restart lithium.service" sudo systemctl restart lithium echo "==> [deploy] done" -sudo systemctl --no-pager --lines=10 status lithium || true +systemctl --no-pager --lines=10 status lithium || true diff --git a/deploy/lock-origin-to-cloudflare.sh b/deploy/lock-origin-to-cloudflare.sh new file mode 100755 index 0000000..6b01712 --- /dev/null +++ b/deploy/lock-origin-to-cloudflare.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# lock-origin-to-cloudflare.sh +# +# Replaces the wide-open ufw rules for ports 80/443 with allow-lists scoped +# to Cloudflare's published IP ranges (cloudflare.com/ips-v4 + ips-v6). +# Port 22 stays open to the world so you don't lock yourself out and so +# GitHub Actions can still SSH in. +# +# Run as root: +# sudo bash /srv/lithium/deploy/lock-origin-to-cloudflare.sh +# +# Idempotent: rerun monthly (or via cron) to pick up Cloudflare IP changes. +# A state file at /var/lib/cloudflare-ufw.list tracks the last applied set so +# stale entries can be pruned cleanly. +# +# Pre-reqs: +# - DNS for your hostname is proxied through Cloudflare (orange cloud) +# - Cloudflare SSL/TLS mode is "Full (strict)" +# - You've already verified the site is reachable via Cloudflare. +# If any of these aren't true yet, do NOT run this script — you'll +# cut off legitimate traffic. + +set -euo pipefail + +if [ "$EUID" -ne 0 ]; then + echo "Run as root (sudo bash $0)" >&2 + exit 1 +fi + +CF_V4_URL="https://www.cloudflare.com/ips-v4" +CF_V6_URL="https://www.cloudflare.com/ips-v6" +STATE_FILE="/var/lib/cloudflare-ufw.list" +COMMENT="cloudflare-origin" + +command -v ufw >/dev/null || { echo "ufw not installed" >&2; exit 1; } +command -v curl >/dev/null || { echo "curl not installed" >&2; exit 1; } + +# 1. Fetch fresh Cloudflare IP ranges. Fail BEFORE touching the firewall. +echo "==> Fetching Cloudflare IP ranges" +CF_V4=$(curl -fsSL --max-time 10 "${CF_V4_URL}") +CF_V6=$(curl -fsSL --max-time 10 "${CF_V6_URL}") + +if [ -z "${CF_V4// }" ]; then + echo "Empty IPv4 list — aborting" >&2 + exit 1 +fi + +NEW_LIST=$(printf "%s\n%s\n" "${CF_V4}" "${CF_V6}" | grep -E '^[0-9a-fA-F:.]+/[0-9]+$' | sort -u) +NEW_COUNT=$(echo "${NEW_LIST}" | wc -l | tr -d ' ') +echo " got ${NEW_COUNT} ranges" + +# 2. Add the new allow-list rules FIRST (additive, no window of exposure loss). +echo "==> Adding allow rules for Cloudflare ranges" +while IFS= read -r ip; do + [ -z "${ip}" ] && continue + ufw allow proto tcp from "${ip}" to any port 80,443 comment "${COMMENT}" >/dev/null +done <<< "${NEW_LIST}" + +# 3. Remove the wide-open 80/443 rules from bootstrap (if still present). +echo "==> Removing wide-open port 80/443 rules" +ufw delete allow 80/tcp >/dev/null 2>&1 || true +ufw delete allow 443/tcp >/dev/null 2>&1 || true +ufw delete allow 80 >/dev/null 2>&1 || true +ufw delete allow 443 >/dev/null 2>&1 || true + +# 4. Prune stale CF rules (IPs in old state file but not in new list). +if [ -f "${STATE_FILE}" ]; then + STALE=$(comm -23 <(sort "${STATE_FILE}") <(echo "${NEW_LIST}" | sort)) + if [ -n "${STALE}" ]; then + echo "==> Pruning $(echo "${STALE}" | wc -l | tr -d ' ') stale CF ranges" + while IFS= read -r ip; do + [ -z "${ip}" ] && continue + ufw delete allow proto tcp from "${ip}" to any port 80,443 >/dev/null 2>&1 || true + done <<< "${STALE}" + fi +fi + +# 5. Persist current list for next run. +install -d -m 0755 "$(dirname "${STATE_FILE}")" +echo "${NEW_LIST}" > "${STATE_FILE}" + +# 6. Reload. +ufw reload >/dev/null + +echo +echo "==> Done. ufw status:" +ufw status verbose | head -40 From 877ca232aff86e03c500d1caf5d41f9a8ab1249f Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Mon, 11 May 2026 08:59:51 -0400 Subject: [PATCH 7/9] feat: convert user-data MD files to .env.example pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The four tracking files (glossary, knowledge-tracker, questions-and- answers, progress-log) are per-environment runtime state — the app rewrites them on every chat turn. Keeping them in git produced noisy diffs and meant the VPS started with whatever happened to be the last local session's residue. Now mirrors the `.env` / `.env.local` pattern: - *.example.md files (committed) — the seed templates - *.md files (gitignored) — the live runtime files, auto-copied from the templates on first run by bootstrapUserData() in content-loader bootstrapUserData runs from ensureSeeded(), which already fires on every API call, so a fresh VPS picks up the templates automatically on the first request — no manual setup beyond `npm install` + auth env vars. Seed templates: empty-stub glossary + Q&A, all-todo knowledge tracker, kickoff-only progress log (drops the two simulation entries that had been committed by accident). Verified: rm-ing all four live files and hitting /api/concepts re-creates them from the templates with identical byte counts to the .example.md siblings. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 10 +++ web/.gitignore | 1 + web/README.md | 22 ++++++ web/src/lib/content-loader.ts | 43 ++++++++++- zero/04-learning/glossary.example.md | 6 ++ zero/04-learning/glossary.md | 77 ------------------- ...racker.md => knowledge-tracker.example.md} | 16 ++-- .../questions-and-answers.example.md | 7 ++ zero/04-learning/questions-and-answers.md | 27 ------- ...rogress-log.md => progress-log.example.md} | 20 ----- 10 files changed, 96 insertions(+), 133 deletions(-) create mode 100644 zero/04-learning/glossary.example.md delete mode 100644 zero/04-learning/glossary.md rename zero/04-learning/{knowledge-tracker.md => knowledge-tracker.example.md} (87%) create mode 100644 zero/04-learning/questions-and-answers.example.md delete mode 100644 zero/04-learning/questions-and-answers.md rename zero/05-meta/{progress-log.md => progress-log.example.md} (77%) diff --git a/.gitignore b/.gitignore index d2c38dd..34621ea 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,16 @@ web/data/*.db-shm zero/_pdf-output/ zero/_pdf-tooling/node_modules/ +# Per-environment user-data tracking files. The seed templates live next +# to them as *.example.md and are committed; the live files are gitignored +# and auto-copied from the templates on first run by +# web/src/lib/content-loader.ts → bootstrapUserData(). Treat them like +# .env vs .env.local. +zero/04-learning/glossary.md +zero/04-learning/knowledge-tracker.md +zero/04-learning/questions-and-answers.md +zero/05-meta/progress-log.md + # python __pycache__/ *.pyc diff --git a/web/.gitignore b/web/.gitignore index a5639e7..c4b2394 100644 --- a/web/.gitignore +++ b/web/.gitignore @@ -12,3 +12,4 @@ data/*.db data/*.db-journal data/*.db-wal data/*.db-shm + diff --git a/web/README.md b/web/README.md index 912c5c0..2de25a8 100644 --- a/web/README.md +++ b/web/README.md @@ -45,6 +45,28 @@ without any chat credentials. The DB lives at `web/data/lithium.db`. Re-sync from disk anytime via the **Settings → Re-sync from zero/** button or `npm run ingest`. +## User-data files (the `.env.example` pattern) + +The four Markdown mirrors above are **per-environment runtime state**, not +source code. They get rewritten by the app every time you chat. Treat them +like `.env` files: + +- `zero/04-learning/glossary.example.md`, `knowledge-tracker.example.md`, + `questions-and-answers.example.md`, `zero/05-meta/progress-log.example.md` + → **committed** seed templates. +- The live files (same paths without `.example`) → **gitignored**, copied + from the templates on first run by `bootstrapUserData()` in + `src/lib/content-loader.ts`. You don't have to do anything manually. + +**To reset to a clean slate** (locally or on a fresh deploy): delete the +live files, delete `web/data/lithium.db`, then hit any route. The next +request re-bootstraps everything from the templates + the corpus. + +**On the VPS**: `git clone` gives you only the `.example.md` files. The +first chat request copies them into place, seeds the DB from the corpus, +and the app is live. No setup step needed beyond `npm install` and +setting auth in `web/.env.local`. + ## Folder layout ``` diff --git a/web/src/lib/content-loader.ts b/web/src/lib/content-loader.ts index 95762a8..129c222 100644 --- a/web/src/lib/content-loader.ts +++ b/web/src/lib/content-loader.ts @@ -1,10 +1,50 @@ import fs from "node:fs"; import path from "node:path"; import crypto from "node:crypto"; -import { CONTENT_DIR, PHASE_DIRS } from "./paths"; +import { + CONTENT_DIR, + PHASE_DIRS, + GLOSSARY_PATH, + KNOWLEDGE_TRACKER_PATH, + QA_PATH, + PROGRESS_LOG_PATH, +} from "./paths"; import { getDb, type ConceptStatus, type PageRow } from "./db"; import { slugify } from "./slug"; +/** + * User-data tracking files (.env.example pattern). The seed templates live + * at `*.example.md` and are committed; the live `*.md` files are gitignored + * and auto-copied from the template on first run. + */ +const USER_DATA_FILES = [ + GLOSSARY_PATH, + KNOWLEDGE_TRACKER_PATH, + QA_PATH, + PROGRESS_LOG_PATH, +]; + +export function bootstrapUserData(): { created: string[]; skipped: string[] } { + const created: string[] = []; + const skipped: string[] = []; + for (const live of USER_DATA_FILES) { + if (fs.existsSync(live)) { + skipped.push(live); + continue; + } + const example = live.replace(/\.md$/, ".example.md"); + if (!fs.existsSync(example)) { + // No template available — leave it; the individual regenerate*/append* + // functions will create a fresh file with their own initial header. + continue; + } + fs.mkdirSync(path.dirname(live), { recursive: true }); + fs.copyFileSync(example, live); + created.push(live); + } + return { created, skipped }; +} + export type DiscoveredPage = { slug: string; phase_id: string; @@ -231,6 +271,7 @@ export function listConcepts() { } export function ensureSeeded() { + bootstrapUserData(); const db = getDb(); const pageCount = (db.prepare("SELECT COUNT(*) AS n FROM pages").get() as { n: number }).n; if (pageCount === 0) syncPagesToDb(); diff --git a/zero/04-learning/glossary.example.md b/zero/04-learning/glossary.example.md new file mode 100644 index 0000000..41c0e66 --- /dev/null +++ b/zero/04-learning/glossary.example.md @@ -0,0 +1,6 @@ +# Glossary + +Living glossary of key terms encountered during the lithium learning journey. +This file is auto-managed by the learning app — edits will be regenerated on the next chat that adds a term. Use the app to add or edit entries. + +_(empty — Claude will add terms here as you chat)_ diff --git a/zero/04-learning/glossary.md b/zero/04-learning/glossary.md deleted file mode 100644 index c91d83a..0000000 --- a/zero/04-learning/glossary.md +++ /dev/null @@ -1,77 +0,0 @@ -# Glossary - -Living glossary of key terms encountered during the lithium learning journey. -This file is auto-managed by the learning app — edits will be regenerated on the next chat that adds a term. Use the app to add or edit entries. - -| Term | Symbol | Units | Definition | See also | -|------|--------|-------|------------|----------| -| Aluminum | Al | | Element with atomic number Z=13, the most abundant metal in Earth's crust. | | -| Antimony | Sb | | Element with atomic number Z=51, a toxic metalloid used in flame retardants. | | -| Argon | Ar | | Element with atomic number Z=18, a noble gas comprising ~1% of Earth's atmosphere. | | -| Arsenic | As | | Element with atomic number Z=33, a toxic metalloid used in semiconductors. | | -| Barium | Ba | | Element with atomic number Z=56, an alkaline earth metal used in medical imaging. | | -| Beryllium | Be | | Element with atomic number Z=4, an alkaline earth metal. | | -| Boron | B | | Element with atomic number Z=5, a metalloid in Group 13. | | -| Bromine | Br | | Element with atomic number Z=35, the only nonmetal liquid at room temperature. | | -| Cadmium | Cd | | Element with atomic number Z=48, a toxic transition metal used in batteries. | | -| Calcium | Ca | | Element with atomic number Z=20, an alkaline earth metal in bones and teeth. | | -| Carbon | C | | Element with atomic number Z=6, the basis of organic chemistry. | | -| Cerium | Ce | | Element with atomic number Z=58, the most abundant lanthanide. | | -| Cesium | Cs | | Element with atomic number Z=55, the most reactive stable alkali metal. | | -| Chlorine | Cl | | Element with atomic number Z=17, a halogen used in water disinfection. | | -| Chromium | Cr | | Element with atomic number Z=24, a transition metal used for corrosion resistance. | | -| Cobalt | Co | | Element with atomic number Z=27, a transition metal in vitamin B12. | | -| Copper | Cu | | Element with atomic number Z=29, a transition metal with high electrical conductivity. | | -| Dysprosium | Dy | | Element with atomic number Z=66, a lanthanide used in high-performance magnets. | | -| Erbium | Er | | Element with atomic number Z=68, a lanthanide used in fiber-optic amplifiers. | | -| Europium | Eu | | Element with atomic number Z=63, a lanthanide used in phosphors for displays. | | -| Fluorine | F | | Element with atomic number Z=9, the most electronegative element. | | -| Gadolinium | Gd | | Element with atomic number Z=64, a lanthanide used in MRI contrast agents. | | -| Gallium | Ga | | Element with atomic number Z=31, a post-transition metal that melts just above room temperature. | | -| Germanium | Ge | | Element with atomic number Z=32, a metalloid used in semiconductors. | | -| Helium | He | | Element with atomic number Z=2, a noble gas produced by stellar fusion. | | -| Holmium | Ho | | Element with atomic number Z=67, a lanthanide with the highest magnetic moment of any element. | | -| Hydrogen | H | | Element with atomic number Z=1, the lightest and most abundant element in the universe. | | -| Indium | In | | Element with atomic number Z=49, a post-transition metal used in touchscreens. | | -| Iodine | I | | Element with atomic number Z=53, a halogen essential for thyroid function. | | -| Iron | Fe | | Element with atomic number Z=26, the most abundant element on Earth by mass. | | -| Krypton | Kr | | Element with atomic number Z=36, a noble gas used in lighting and lasers. | | -| Lanthanum | La | | Element with atomic number Z=57, the first lanthanide element. | | -| Lithium | Li | | Element with atomic number Z=3, the lightest alkali metal. | | -| Magnesium | Mg | | Element with atomic number Z=12, an alkaline earth metal in chlorophyll. | | -| Manganese | Mn | | Element with atomic number Z=25, a transition metal essential for steel production. | | -| Molybdenum | Mo | | Element with atomic number Z=42, a transition metal used in high-strength steel. | | -| Neodymium | Nd | | Element with atomic number Z=60, a lanthanide used in powerful permanent magnets. | | -| Neon | Ne | | Element with atomic number Z=10, a noble gas used in lighting. | | -| Nickel | Ni | | Element with atomic number Z=28, a transition metal used in batteries and alloys. | | -| Niobium | Nb | | Element with atomic number Z=41, a transition metal used in superconducting alloys. | | -| Nitrogen | N | | Element with atomic number Z=7, makes up 78% of Earth's atmosphere. | | -| Oxygen | O | | Element with atomic number Z=8, essential for aerobic respiration. | | -| Palladium | Pd | | Element with atomic number Z=46, a platinum-group metal used in catalysis and electronics. | | -| Phosphorus | P | | Element with atomic number Z=15, essential for ATP and DNA. | | -| Potassium | K | | Element with atomic number Z=19, an alkali metal essential for cell electrochemistry. | | -| Praseodymium | Pr | | Element with atomic number Z=59, a lanthanide used in magnets and lasers. | | -| Promethium | Pm | | Element with atomic number Z=61, a radioactive lanthanide with no stable isotopes. | | -| Rhodium | Rh | | Element with atomic number Z=45, a rare platinum-group metal used in catalytic converters. | | -| Rubidium | Rb | | Element with atomic number Z=37, a highly reactive alkali metal. | | -| Ruthenium | Ru | | Element with atomic number Z=44, a platinum-group transition metal. | | -| Samarium | Sm | | Element with atomic number Z=62, a lanthanide used in magnets and nuclear reactors. | | -| Scandium | Sc | | Element with atomic number Z=21, the first transition metal. | | -| Selenium | Se | | Element with atomic number Z=34, a chalcogen essential in trace amounts. | | -| Silicon | Si | | Element with atomic number Z=14, a metalloid fundamental to semiconductors. | | -| Silver | Ag | | Element with atomic number Z=47, the most conductive metal for electricity and heat. | | -| Sodium | Na | | Element with atomic number Z=11, an alkali metal essential for nerve function. | | -| Strontium | Sr | | Element with atomic number Z=38, an alkaline earth metal used in fireworks. | | -| Sulfur | S | | Element with atomic number Z=16, a chalcogen found in amino acids. | | -| Technetium | Tc | | Element with atomic number Z=43, the lightest element with no stable isotopes. | | -| Tellurium | Te | | Element with atomic number Z=52, a brittle metalloid used in thermoelectrics. | | -| Terbium | Tb | | Element with atomic number Z=65, a lanthanide used in green phosphors and magnets. | | -| Thulium | Tm | | Element with atomic number Z=69, the second-rarest naturally occurring lanthanide. | | -| Tin | Sn | | Element with atomic number Z=50, a post-transition metal used in solder and coatings. | | -| Titanium | Ti | | Element with atomic number Z=22, a strong, lightweight transition metal. | | -| Vanadium | V | | Element with atomic number Z=23, a transition metal used in steel alloys. | | -| Xenon | Xe | | Element with atomic number Z=54, a noble gas used in lighting and anesthesia. | | -| Ytterbium | Yb | | Element with atomic number Z=70, a lanthanide used in atomic clocks and lasers. | | -| Yttrium | Y | | Element with atomic number Z=39, a transition metal used in phosphors. | | -| Zinc | Zn | | Element with atomic number Z=30, a transition metal essential for enzymes. | | -| Zirconium | Zr | | Element with atomic number Z=40, a corrosion-resistant transition metal. | | diff --git a/zero/04-learning/knowledge-tracker.md b/zero/04-learning/knowledge-tracker.example.md similarity index 87% rename from zero/04-learning/knowledge-tracker.md rename to zero/04-learning/knowledge-tracker.example.md index 2f16e57..8792c1f 100644 --- a/zero/04-learning/knowledge-tracker.md +++ b/zero/04-learning/knowledge-tracker.example.md @@ -87,16 +87,16 @@ of it correctly. - ⏳ What water treatment is and why we do it - ⏳ Drinking water vs. wastewater vs. industrial - ⏳ Coagulation / flocculation / sedimentation -- ⏳ What water treatment is and why we do it - ⏳ Filtration (sand, carbon, multimedia) +- ⏳ What water treatment is and why we do it - ⏳ Disinfection (chlorine, UV, ozone) - ⏳ Ion exchange resins -- ⏳ Drinking water vs. wastewater vs. industrial - ⏳ Membrane processes (MF / UF / NF / RO) - ⏳ Electrodialysis & capacitive deionization +- ⏳ Drinking water vs. wastewater vs. industrial - ⏳ Distillation -- ⏳ Coagulation / flocculation / sedimentation - ⏳ Why water-treatment unit ops are the right vocabulary for ion separation +- ⏳ Coagulation / flocculation / sedimentation - ⏳ Filtration (sand, carbon, multimedia) - ⏳ Disinfection (chlorine, UV, ozone) - ⏳ Ion exchange resins @@ -126,11 +126,11 @@ of it correctly. - ⏳ Industrial / geopolitical context (who supplies Li-6, fusion implications) - ⏳ Why separate Li-6 from Li-7 (fusion, fission, scientific) - ⏳ The natural abundance ratio (~7.5% / 92.5%) -- ✅ Mass-difference physics & why ~17% is "a lot" but still hard -- ✅ Separation factor (α) — what it means and typical values -- ✅ COLEX (mercury amalgam) — chemistry, history, environmental issues +- ⏳ Mass-difference physics & why ~17% is "a lot" but still hard +- ⏳ Separation factor (α) — what it means and typical values +- ⏳ COLEX (mercury amalgam) — chemistry, history, environmental issues - ⏳ Electromagnetic separation (calutron) -- ✅ Laser methods (AVLIS, MLIS) +- ⏳ Laser methods (AVLIS, MLIS) - ⏳ Distillation - ⏳ Chemical exchange without mercury - ⏳ Ion-exchange chromatography @@ -148,4 +148,4 @@ what didn't, and the new confidence symbols promoted.) | Date | Covered | Promoted | Notes | |------|---------|----------|-------| -| 2026-05-10 | Jumped to phase 3 (physics of isotope separation) without completing chemistry-fundamentals 05–06. Used corpus tools to | 03-lithium-isotope-separation/general-mass-difference-physics-why-17-is-a-lot-but-still-hard, 03-lithium-isotope-separation/general-separation-factor-α-what-it-means-and-typical-values, 03-lithium-isotope-separation/general-colex-mercury-amalgam-chemistry-history-environmental-issues, 03-lithium-isotope-separation/general-laser-methods-avlis-mlis | | +| _(no sessions yet)_ | | | | diff --git a/zero/04-learning/questions-and-answers.example.md b/zero/04-learning/questions-and-answers.example.md new file mode 100644 index 0000000..28b37c9 --- /dev/null +++ b/zero/04-learning/questions-and-answers.example.md @@ -0,0 +1,7 @@ +# Questions & Answers Log + +Persistent record of substantive questions and Claude's answers from teaching sessions. Auto-managed by the learning app — additions arrive whenever Claude calls `append_qa` (or you POST to `/api/qa`). + +--- + +_(no Q&A yet — appended as teaching sessions happen)_ diff --git a/zero/04-learning/questions-and-answers.md b/zero/04-learning/questions-and-answers.md deleted file mode 100644 index 42c869a..0000000 --- a/zero/04-learning/questions-and-answers.md +++ /dev/null @@ -1,27 +0,0 @@ -# Questions & Answers Log - -Persistent record of substantive questions and Claude's answers from teaching sessions. Auto-managed by the learning app — additions arrive whenever Claude calls `append_qa` (or you POST to `/api/qa`). - ---- - -_(no Q&A yet — appended as teaching sessions happen)_ - -### Q-006 — How do COLEX and AVLIS exploit different isotope effects, and what are the trade -**Date:** 2026-05-10 -**Phase:** 03-lithium-isotope-separation -**Question:** - -How do COLEX and AVLIS exploit different isotope effects, and what are the trade-offs? - -**Answer (summary):** - -**COLEX** (mercury amalgam) exploits **zero-point energy (ZPE) differences** in chemical bonds. At equilibrium between Li–Hg amalgam and aqueous LiOH, the heavier isotope (Li-7) has lower ZPE → preferentially occupies the phase with stronger bonds (aqueous Li–O coordination), while Li-6 prefers the amalgam. The fractional ZPE difference depends on reduced mass: larger when Li bonds to light partners (O) than heavy ones (Hg). Per-stage α ≈ 1.05–1.06; requires hundreds of cascade stages. Industrial throughput (tonnes/yr), but Hg toxicity is disqualifying. - -**AVLIS** (Atomic Vapor Laser Isotope Separation) exploits the **isotope shift** in atomic spectral lines (~10 GHz at the 670.8 nm D-line), arising from finite-nuclear-mass and finite-nuclear-volume corrections to electron energy levels. A tuned laser selectively ionizes Li-6 atoms; an electric field collects the ions. Single-pass α >> 10 (no cascade needed). Clean, but throughput-limited by laser power and vapor flux (kg/yr vs. tonnes/yr for COLEX). - -**Trade-off**: Equilibrium chemistry (COLEX) gives small α but high throughput and scales industrially. Laser physics (AVLIS) gives enormous α but low throughput and high capital cost. The economics depend on target purity and Li-6 market price. - -**Anchor:** 03-lithium-isotope-separation/02-physics-of-isotope-separation.md -**Concept:** 03-lithium-isotope-separation/general-colex-mercury-amalgam-chemistry-history-environmental-issues - ---- diff --git a/zero/05-meta/progress-log.md b/zero/05-meta/progress-log.example.md similarity index 77% rename from zero/05-meta/progress-log.md rename to zero/05-meta/progress-log.example.md index 70ddf5d..6391f09 100644 --- a/zero/05-meta/progress-log.md +++ b/zero/05-meta/progress-log.example.md @@ -136,23 +136,3 @@ typos. The post-mortem reads this end-to-end. - After the lithium track has progressed substantively, do a post-mortem and synthesize the patterns into a reusable Claude Code skill for "from-zero domain learning" (Track 2). - -## 2026-05-10 — Completed modules 01 (Matter & Atoms), 02 (Atomic Structure), and 03 (Electric C - -**What we did:** -Completed modules 01 (Matter & Atoms), 02 (Atomic Structure), and 03 (Electric Charge). Deep dive into fundamentals of charge, Coulomb's law, EM vs. gravity at atomic scale, fine-structure constant context. Multiple teach-backs with correct consequence predictions (e.g., effect of doubled elementary charge on atomic radius and binding energy). - - -**Promoted:** 01-chemistry-fundamentals/electric-charge-what-charge-is-positive-negative-neutral, 01-chemistry-fundamentals/electric-charge-coulomb-s-law-in-plain-language, 01-chemistry-fundamentals/electric-charge-why-opposite-charges-attract-and-like-charges-repel, 01-chemistry-fundamentals/electric-charge-charge-as-a-conserved-quantity - ---- - -## 2026-05-10 — Jumped to phase 3 (physics of isotope separation) without completing chemistry-f - -**What we did:** -Jumped to phase 3 (physics of isotope separation) without completing chemistry-fundamentals 05–06. Used corpus tools to extract ZPE → bond-strength chain from 06-isotopes. Worked separation-factor α definition, reduced-mass arithmetic, and ZPE-environment differences rigorously. Compared COLEX (equilibrium chemistry, α ≈ 1.05, ZPE in Li–O vs. Li–Hg bonds) and AVLIS (laser spectroscopy, α >> 10, isotope shift at 670.8 nm). Demonstrated consequence prediction: Li-Pb amalgam would give smaller α (heavier partner); Li-H chemistry would give larger α (lighter partner, maximum ZPE difference). Promoted 4 concepts to solid. - - -**Promoted:** 03-lithium-isotope-separation/general-mass-difference-physics-why-17-is-a-lot-but-still-hard, 03-lithium-isotope-separation/general-separation-factor-α-what-it-means-and-typical-values, 03-lithium-isotope-separation/general-colex-mercury-amalgam-chemistry-history-environmental-issues, 03-lithium-isotope-separation/general-laser-methods-avlis-mlis - ---- From ebd54037c18d76fa372607a18543ef8f4cdf79aa Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Mon, 11 May 2026 09:09:19 -0400 Subject: [PATCH 8/9] chore(deploy): make ingest do the whole job + soft-call summarize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deploy was missing two things: 1. `npm run ingest` only synced pages + concepts and regenerated two mirror files. It did NOT build the corpus_node + corpus_fts tree that the chat tools navigate, and did NOT bootstrap the live user-data .md files from their .example.md templates. The app technically self-healed via the lazy build in ensureSeeded(), but that fires async-fire-and-forget on the first API call, which races against incoming chat traffic. Fixes: ingest now calls bootstrapUserData() first, buildCorpusIndex() after pages/concepts, and also regenerates the Q&A mirror (not just glossary + knowledge-tracker). 2. summary_cache was never populated on the VPS. Without it, the chat outline uses placeholder first-sentence summaries instead of Haiku-written ones. Adds a soft `npm run summarize` step to deploy.sh, gated on auth presence and skippable via LITHIUM_SKIP_SUMMARIZE=1. Idempotent by content_hash — subsequent deploys re-summarize only changed sections. Auth-missing or transient errors warn but don't fail the deploy. Verified locally: `rm zero/{04-learning,05-meta}/*.md && npm run ingest` re-bootstraps all four files from their templates, populates pages + concepts (88 concepts seeded), builds 446 corpus nodes, regenerates the three live mirror files. Idempotent on second run. Co-Authored-By: Claude Opus 4.7 (1M context) --- deploy/deploy.sh | 17 ++++++++++++++++- web/scripts/ingest.ts | 26 +++++++++++++++++++++----- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/deploy/deploy.sh b/deploy/deploy.sh index a309dfd..3ac7b8c 100755 --- a/deploy/deploy.sh +++ b/deploy/deploy.sh @@ -35,13 +35,28 @@ echo "==> [deploy] next build" NEXT_TELEMETRY_DISABLED=1 npm run build echo "==> [deploy] re-ingest curriculum into SQLite" -# Run ingest with the same env the systemd unit uses, so paths line up. +# `npm run ingest` now also: bootstraps the user-data live files from +# their .example.md siblings, builds the corpus_node + corpus_fts tree +# (PageIndex-style nav index), and regenerates the live mirror files. set -a # shellcheck disable=SC1090 source "${ENV_FILE}" set +a npm run ingest +echo "==> [deploy] (re)generate LLM summaries for corpus nodes" +# Idempotent — skips any node whose content_hash already has a cached +# summary. Falls back gracefully when auth is missing. +# Disable entirely with LITHIUM_SKIP_SUMMARIZE=1 in the env file. +if [ "${LITHIUM_SKIP_SUMMARIZE:-0}" = "1" ]; then + echo " LITHIUM_SKIP_SUMMARIZE=1 set, skipping" +elif [ -z "${ANTHROPIC_API_KEY:-}" ] && [ -z "${CLAUDE_CODE_OAUTH_TOKEN:-}" ]; then + echo " no ANTHROPIC_API_KEY or CLAUDE_CODE_OAUTH_TOKEN — skipping (app uses first-sentence summaries)" +else + # Soft-fail: a transient auth/rate-limit error must not block the deploy. + npm run summarize || echo " [warn] summarize failed, continuing deploy" +fi + echo "==> [deploy] restart lithium.service" sudo systemctl restart lithium diff --git a/web/scripts/ingest.ts b/web/scripts/ingest.ts index b12d749..b843953 100644 --- a/web/scripts/ingest.ts +++ b/web/scripts/ingest.ts @@ -1,7 +1,20 @@ -import { syncPagesToDb, syncConceptsToDb } from "../src/lib/content-loader"; -import { regenerateGlossaryFile, regenerateKnowledgeTrackerFile } from "../src/lib/markdown-sync"; +import { + bootstrapUserData, + syncPagesToDb, + syncConceptsToDb, +} from "../src/lib/content-loader"; +import { buildCorpusIndex } from "../src/lib/corpus-builder"; +import { + regenerateGlossaryFile, + regenerateKnowledgeTrackerFile, + regenerateQAFile, +} from "../src/lib/markdown-sync"; async function main() { + console.log("[ingest] bootstrapping user-data files from .example.md…"); + const boot = bootstrapUserData(); + console.log("[ingest] bootstrap:", { created: boot.created.length }); + console.log("[ingest] syncing pages…"); const pages = syncPagesToDb(); console.log("[ingest] pages:", pages); @@ -10,11 +23,14 @@ async function main() { const concepts = syncConceptsToDb(); console.log("[ingest] concepts:", concepts); - console.log("[ingest] regenerating glossary file…"); - regenerateGlossaryFile(); + console.log("[ingest] building corpus tree (PageIndex)…"); + const corpus = buildCorpusIndex(); + console.log("[ingest] corpus:", corpus); - console.log("[ingest] regenerating knowledge tracker file…"); + console.log("[ingest] regenerating live mirror files…"); + regenerateGlossaryFile(); regenerateKnowledgeTrackerFile(); + regenerateQAFile(); console.log("[ingest] done."); } From 4dfe9d121ace1bca7699abc910b40df711ab639b Mon Sep 17 00:00:00 2001 From: Tarek Kekhia Date: Mon, 11 May 2026 09:18:30 -0400 Subject: [PATCH 9/9] feat(deploy): cloudflare-harden skill + install-origin-cert helper Captures the production hardening sequence we just applied (from-zero.emeraldlake.io) as a reusable, project-scope Claude Code skill so it can be rerun on a fresh box without rediscovering the gotchas. - .claude/skills/cloudflare-harden/SKILL.md: 8-step playbook (Origin CA cert -> Full strict TLS -> orange-cloud proxy -> ufw lock to CF IPs -> Zero Trust Access). Auto-discoverable; triggers on "harden the box", "lock origin to cloudflare", "add cloudflare access", etc. Documents the actual bugs we hit (private-key-clobbered-by-Write, sudoers arg mismatch on systemctl status, NXDOMAIN cache, MCP scope quirks) plus rollback recipes for every step. - deploy/install-origin-cert.sh: reusable origin-side installer. Takes cert + key + IP, verifies the pair via pubkey hash before sending anything to the box (catches the file-overwrite bug), scps cert + key + the project Caddyfile, installs with caddy ownership, validates and reloads. Idempotent. - deploy/bootstrap.sh: stop copying the Caddyfile or restarting Caddy. The TLS-enabled Caddyfile references /etc/caddy/tls/origin.{pem,key} which don't exist on a fresh box, so installing it here crashed Caddy. Caddy stays on its default config until the harden playbook installs the cert + Caddyfile together. Next-steps text now points at the skill. - deploy/README.md: adds step 7 (Cloudflare hardening), updates step 1 to start in grey cloud (proxy gets flipped on as part of step 7), lists the new scripts in the file table. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/skills/cloudflare-harden/SKILL.md | 229 ++++++++++++++++++++++ deploy/README.md | 40 +++- deploy/bootstrap.sh | 31 ++- deploy/install-origin-cert.sh | 81 ++++++++ 4 files changed, 368 insertions(+), 13 deletions(-) create mode 100644 .claude/skills/cloudflare-harden/SKILL.md create mode 100755 deploy/install-origin-cert.sh diff --git a/.claude/skills/cloudflare-harden/SKILL.md b/.claude/skills/cloudflare-harden/SKILL.md new file mode 100644 index 0000000..b94c2ed --- /dev/null +++ b/.claude/skills/cloudflare-harden/SKILL.md @@ -0,0 +1,229 @@ +--- +name: cloudflare-harden +description: End-to-end Cloudflare hardening playbook for a self-hosted app on a single VPS reverse-proxied by Caddy. Takes a bootstrapped box from "Let's Encrypt + wide-open firewall + public site" to "Cloudflare Origin CA + Full strict TLS + proxied DNS + UFW locked to Cloudflare IPs + Zero Trust Access". Use when the user says "harden the box", "lock origin to cloudflare", "add cloudflare access", "ufw + cloudflare", "cloudflare proxy + access setup", or describes putting Cloudflare in front of a self-hosted origin. Requires the cloudflare-api MCP server (or a CF API token) and ssh root@ to the origin. +--- + +# Cloudflare hardening playbook + +The exact sequence applied to `from-zero.emeraldlake.io` (Hetzner CX23, Caddy → Next.js). Run from a laptop that has both the Cloudflare MCP connected and SSH access to the origin. + +## Parameters to collect from the user before starting + +| Var | Example | How to derive | +|---|---|---| +| `HOSTNAME` | `from-zero.emeraldlake.io` | the FQDN to harden | +| `ROOT_DOMAIN` | `emeraldlake.io` | parent zone, for the CF zone lookup | +| `ORIGIN_IP` | `178.105.102.91` | server's public IP (already in DNS if grey-cloud is set) | +| `APP_PORT` | `3717` | what the reverse_proxy target is | +| `ALLOWED_EMAILS` | `tarek.kekhia@emeraldlake.io` | one or more for the Access policy | +| `SESSION_DURATION` | `720h` | CF Access cookie lifetime | +| `ZONE_ID` | `1a8035c3...` | resolved via `GET /zones?name=` | +| `ACCOUNT_ID` | preset by MCP | exposed as `accountId` in mcp__cloudflare-api__execute | + +Ask the user only for the ones that aren't obvious. Use `mcp__cloudflare-api__execute` to look up `ZONE_ID` from `ROOT_DOMAIN`. Confirm `HOSTNAME` and `ALLOWED_EMAILS` explicitly. + +## Pre-flight + +The origin must already be running Caddy + the app behind it on `127.0.0.1:APP_PORT`. If the box was bootstrapped by `deploy/bootstrap.sh`, Caddy is installed but no Caddyfile is in place yet (that's intentional: we install the TLS-enabled Caddyfile as part of this playbook, after the Origin CA cert exists). + +Verify before proceeding: + +```bash +ssh root@ 'systemctl is-active caddy; ss -tlnp | grep -E ":80|:443|:"; ls /etc/caddy/tls/ 2>/dev/null' +``` + +## The 8 steps + +### 1. DNS A record (grey cloud, temporary) + +```js +// in cloudflare-api MCP +await cloudflare.request({ method: "POST", path: `/zones/${ZONE_ID}/dns_records`, body: { + type: "A", name: HOSTNAME, content: ORIGIN_IP, proxied: false, ttl: 1, +}}); +``` + +Verify: `dig +short @1.1.1.1` returns `ORIGIN_IP`. + +### 2. Generate Origin CA cert + +Run on **the laptop** (so the private key is generated locally, never round-tripped through Cloudflare): + +```bash +WORK=/tmp/cf-origin-${HOSTNAME} +mkdir -p "$WORK" && cd "$WORK" +openssl req -new -newkey rsa:2048 -nodes -keyout origin.key -out origin.csr \ + -subj "/CN=${HOSTNAME}" +chmod 600 origin.key +``` + +POST the CSR to Cloudflare via MCP. Capture the returned cert: + +```js +const csr = await fs.readFile(`${WORK}/origin.csr`, "utf8"); +const r = await cloudflare.request({ method: "POST", path: "/certificates", body: { + hostnames: [HOSTNAME], + request_type: "origin-rsa", + requested_validity: 5475, // 15 years + csr, +}}); +// save r.result.certificate to ${WORK}/origin.pem +``` + +**Critical: do NOT use the `Write` tool to save the cert file.** Use `cat > origin.pem <<'CERT' ... CERT` via Bash. Reason: during the first attempt for this project, something clobbered the local private key with the cert content right after Write was called on the cert path. We lost the key and had to revoke + reissue. Always verify with: + +```bash +openssl x509 -in origin.pem -pubkey -noout | openssl md5 +openssl pkey -in origin.key -pubout 2>/dev/null | openssl md5 +# the two hashes MUST match +``` + +Save the returned `id` from the API response so you can revoke later if needed. + +### 3. Install cert + Caddyfile on origin + +Use `deploy/install-origin-cert.sh` — it scps the pair and the project Caddyfile, places them with correct ownership/perms, validates Caddyfile, reloads Caddy. + +```bash +bash deploy/install-origin-cert.sh /tmp/cf-origin-${HOSTNAME}/origin.pem /tmp/cf-origin-${HOSTNAME}/origin.key ${ORIGIN_IP} +``` + +Verify Caddy logs are clean (look for `tls.cache.maintenance` lines, ignore the benign `no OCSP stapling for [cloudflare origin certificate]: no URL to issuing certificate` warning — Origin CA certs don't carry OCSP URLs). + +Sanity-check the origin is serving the new cert: + +```bash +echo | openssl s_client -connect ${ORIGIN_IP}:443 -servername ${HOSTNAME} 2>/dev/null \ + | openssl x509 -noout -issuer -dates +# issuer should be: CloudFlare Origin SSL Certificate Authority +``` + +### 4. Cloudflare SSL/TLS mode → Full (strict) + +```js +await cloudflare.request({ method: "PATCH", path: `/zones/${ZONE_ID}/settings/ssl`, + body: { value: "strict" }}); +``` + +### 5. Flip DNS to proxied (orange cloud) + +Find the DNS record id, then PATCH it: + +```js +const dns = await cloudflare.request({ method: "GET", path: `/zones/${ZONE_ID}/dns_records`, query: { name: HOSTNAME } }); +const dnsId = dns.result[0].id; +await cloudflare.request({ method: "PATCH", path: `/zones/${ZONE_ID}/dns_records/${dnsId}`, body: { proxied: true } }); +``` + +### 6. Verify edge + +```bash +CF_IP=$(dig +short ${HOSTNAME} @1.1.1.1 | head -1) +curl -sS -o /dev/null -D - --resolve ${HOSTNAME}:443:${CF_IP} https://${HOSTNAME}/ \ + | grep -iE '^(http|server|cf-ray):' +# expect: HTTP/2 200, server: cloudflare, cf-ray: ... +``` + +Confirm visitors see a publicly-trusted edge cert (issuer will be Google Trust Services or similar — Cloudflare's edge cert provider). The Origin CA cert is now only used between Cloudflare and your box. + +### 7. Lock UFW to Cloudflare IPs + +Already in the repo as `deploy/lock-origin-to-cloudflare.sh`. SCP and run: + +```bash +scp deploy/lock-origin-to-cloudflare.sh root@${ORIGIN_IP}:/tmp/ +ssh root@${ORIGIN_IP} bash /tmp/lock-origin-to-cloudflare.sh +``` + +Verify direct origin hits time out: + +```bash +curl -sS -o /dev/null -k --resolve ${HOSTNAME}:443:${ORIGIN_IP} \ + -w "Direct: HTTP %{http_code} in %{time_total}s\n" --max-time 10 \ + https://${HOSTNAME}/ 2>&1 +# expect: timeout / errno 28 / 10s +``` + +### 8. Cloudflare Access (optional gate) + +Use the **zone-scoped** Access endpoints — the project's MCP token has zone Access scope but not account-level Access scope: + +```js +// Create the app +const app = await cloudflare.request({ method: "POST", + path: `/zones/${ZONE_ID}/access/apps`, + body: { + type: "self_hosted", + name: `Access gate for ${HOSTNAME}`, + domain: HOSTNAME, + session_duration: SESSION_DURATION, + auto_redirect_to_identity: false, + app_launcher_visible: true, + allowed_idps: [], // empty = all available, including the built-in One-time PIN + }, +}); +const APP_ID = app.result.id; + +// Create the allow policy +await cloudflare.request({ method: "POST", + path: `/zones/${ZONE_ID}/access/apps/${APP_ID}/policies`, + body: { + name: "Allowlist", + decision: "allow", + include: ALLOWED_EMAILS.map(email => ({ email: { email } })), + precedence: 1, + }, +}); +``` + +Verify: unauthenticated request returns 302 to `.cloudflareaccess.com/cdn-cgi/access/login/...`. + +```bash +curl -sS -D - -o /dev/null --max-redirs 0 --resolve ${HOSTNAME}:443:${CF_IP} \ + https://${HOSTNAME}/ | head -5 +# expect: HTTP/2 302 with location header pointing to cloudflareaccess.com +``` + +## Pre-existing requirements / setup steps + +**Zero Trust enablement** is a one-time, dashboard-only step. If `GET /zones/${ZONE_ID}/access/apps` returns `9999: access.api.error.not_enabled`, tell the user: open https://one.dash.cloudflare.com/ → pick a team subdomain (becomes `.cloudflareaccess.com`) → choose Free plan. Then re-try. No API path exists to enable Zero Trust for the first time. + +## Verification checklist (run at the very end) + +```bash +dig +short ${HOSTNAME} @1.1.1.1 # CF anycast IP +curl -I https://${HOSTNAME} # 302 to .cloudflareaccess.com (or 200 if no Access) +curl -k --resolve ${HOSTNAME}:443:${ORIGIN_IP} https://${HOSTNAME}/ # timeout (UFW dropping) +ssh root@${ORIGIN_IP} 'ufw status verbose | head -25' # 80,443 only from CF ranges, 22 open +ssh root@${ORIGIN_IP} 'echo | openssl s_client -connect 127.0.0.1:443 -servername '${HOSTNAME}' 2>/dev/null | openssl x509 -noout -issuer' # CF Origin CA +``` + +## Rollback recipes + +| To undo | Run | +|---|---| +| Access gate | `DELETE /zones/${ZONE_ID}/access/apps/${APP_ID}` | +| UFW lockdown | restore wide-open: `ufw allow 80,443/tcp` then `ufw reload`, plus delete the CF rules (script supports `rm /var/lib/cloudflare-ufw.list` + manual rules cleanup) | +| Cloudflare proxy | PATCH dns_record `proxied: false` (grey cloud) | +| Strict SSL | PATCH `/zones/${ZONE_ID}/settings/ssl` to `"full"` or `"flexible"` | +| Origin CA cert | DELETE `/certificates/${CERT_ID}` (Cloudflare revokes; the operator still needs to swap the Caddyfile back to ACME and reload Caddy) | + +## Things that look scary but aren't + +- Caddy logs `no OCSP stapling for [cloudflare origin certificate]: no URL to issuing certificate` — Origin CA certs don't have OCSP URLs. Benign. +- During step 5, my local resolver may have cached NXDOMAIN. Bypass with `dig @1.1.1.1` or `curl --resolve`. +- `/user/tokens/verify` returns "Invalid API Token" through the MCP — the MCP uses a non-standard auth path; ignore. + +## Things to actually fear + +- **Losing the Origin CA private key.** It's never sent to Cloudflare. If lost, the cert is unusable. Revoke + reissue. Always pubkey-match cert + key right after generation. +- **Flipping the proxy on with Let's Encrypt + ACME still active.** Caddy will silently fail renewals (CF intercepts :80 for HTTP-01) and the cert eventually expires. Solution is what this playbook does: replace ACME with the Origin CA cert before flipping orange-cloud. +- **UFW-lockdown before proxy is on.** Will brick the site for everyone including the operator (Caddy can't be reached). Always do step 7 AFTER step 6 verifies the edge route works. + +## Related files in this repo + +- `deploy/install-origin-cert.sh` — origin-side cert + Caddyfile installer (idempotent) +- `deploy/lock-origin-to-cloudflare.sh` — UFW lockdown (idempotent, has state file) +- `deploy/Caddyfile` — the TLS-enabled Caddyfile that points at `/etc/caddy/tls/origin.{pem,key}` +- `deploy/bootstrap.sh` — fresh-box bootstrap; intentionally does NOT install the Caddyfile or start Caddy +- `deploy/README.md` — runbook that references this skill diff --git a/deploy/README.md b/deploy/README.md index 503fa9a..2dc3323 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -9,12 +9,19 @@ deploying on every push to `main`. | File | Purpose | |-----------------------|------------------------------------------------------------| -| `bootstrap.sh` | One-shot system bootstrap. Run as root on a fresh server. | +| `bootstrap.sh` | One-shot system bootstrap. Run as root on a fresh server. Leaves Caddy on default config; TLS arrives via the harden playbook. | | `deploy.sh` | Idempotent deploy. Run as `lithium` user. Called by GHA. | +| `install-origin-cert.sh` | Installs a Cloudflare Origin CA cert + the project Caddyfile on the box. Verifies cert/key pair match first. | +| `lock-origin-to-cloudflare.sh` | Replaces wide-open ufw 80/443 rules with allow-lists scoped to Cloudflare's published IP ranges. | | `lithium.service` | systemd unit for the Next.js process. | -| `Caddyfile` | Reverse proxy + auto-TLS for the domain. | +| `Caddyfile` | Reverse proxy + Cloudflare Origin CA TLS for the domain. | | `lithium.env.example` | Template for `/etc/lithium/lithium.env` (secrets live here).| +For the end-to-end Cloudflare hardening sequence (Origin CA + Full strict + +orange-cloud + UFW lockdown + Zero Trust Access), see the project skill at +[`.claude/skills/cloudflare-harden/SKILL.md`](../.claude/skills/cloudflare-harden/SKILL.md). +In Claude Code, say "harden the box" to invoke it. + ## Architecture ``` @@ -32,10 +39,11 @@ during deploy never touches the running app's data. ## Bring-up — one time -### 1. DNS +### 1. DNS (grey cloud, temporary) Point an `A` record for `from-zero.emeraldlake.io` at the Hetzner box's -public IP. Caddy fetches a Let's Encrypt cert on first request. +public IP. Start in **DNS-only (grey cloud)** mode — the hardening +playbook flips it to proxied later, once the Origin CA cert is in place. ### 2. System bootstrap @@ -51,7 +59,10 @@ ssh root@ bash /tmp/deploy/bootstrap.sh This installs Node 20, Caddy, build tools, creates the `lithium` user, state dirs, swap, ufw rules, sudoers entry, and drops the systemd unit -and Caddyfile into place. +in place. It intentionally leaves Caddy on its default config — the +project Caddyfile references an Origin CA cert that doesn't exist yet, +so installing the Caddyfile here would crash Caddy. TLS arrives via the +harden playbook in step 7. ### 3. Generate the deploy SSH key (on the server) @@ -100,7 +111,24 @@ Watch logs: sudo journalctl -u lithium -f ``` -Visit . +The app is now listening on `127.0.0.1:3717` but not reachable from the +internet yet — Caddy is still on its default config. + +### 7. Cloudflare hardening + +Open this repo in Claude Code and say **"harden the box"**, or follow +[`.claude/skills/cloudflare-harden/SKILL.md`](../.claude/skills/cloudflare-harden/SKILL.md) +manually. The playbook walks through: + +1. Generate Origin CA cert (RSA 2048, valid 15 years) via Cloudflare MCP. +2. Install cert + Caddyfile on the box (`deploy/install-origin-cert.sh`). +3. Set Cloudflare SSL/TLS mode to **Full (strict)**. +4. Flip DNS to **proxied (orange cloud)**. +5. Lock UFW to Cloudflare IP ranges only (`deploy/lock-origin-to-cloudflare.sh`). +6. (Optional) Add a **Cloudflare Access** application + policy so visitors + must authenticate before any request reaches the origin. + +After this, visit . ## Day-2 diff --git a/deploy/bootstrap.sh b/deploy/bootstrap.sh index d6a30dd..efa3c18 100755 --- a/deploy/bootstrap.sh +++ b/deploy/bootstrap.sh @@ -108,9 +108,14 @@ cp "$(dirname "$0")/lithium.service" /etc/systemd/system/lithium.service systemctl daemon-reload systemctl enable lithium.service -echo "==> Installing Caddyfile" -cp "$(dirname "$0")/Caddyfile" /etc/caddy/Caddyfile -systemctl restart caddy +echo "==> Caddy left at its default config" +# The project Caddyfile uses /etc/caddy/tls/origin.{pem,key} (Cloudflare +# Origin CA cert). Those files don't exist on a fresh box, so installing the +# Caddyfile here would crash Caddy. Operator installs cert + Caddyfile +# together via deploy/install-origin-cert.sh as part of the +# `.claude/skills/cloudflare-harden` playbook. Caddy stays on its default +# config until then. +systemctl enable caddy >/dev/null 2>&1 || true echo "==> Seeding env file at ${ENV_FILE} (chmod 600, owner ${APP_USER})" if [ ! -f "${ENV_FILE}" ]; then @@ -126,8 +131,9 @@ Bootstrap complete. Next steps: -1. Point DNS A-record ${DOMAIN} → this server's IP. Caddy will fetch a - Let's Encrypt cert automatically on the next request. +1. Point DNS A-record ${DOMAIN} → this server's IP. + Start in DNS-only (grey cloud) mode; the cloudflare-harden playbook + flips it to proxied later. 2. Generate a deploy SSH key on this box (as root or as ${APP_USER}): @@ -135,7 +141,7 @@ Next steps: sudo -u ${APP_USER} cat /home/${APP_USER}/.ssh/id_ed25519.pub >> /home/${APP_USER}/.ssh/authorized_keys chown ${APP_USER}:${APP_USER} /home/${APP_USER}/.ssh/authorized_keys sudo -u ${APP_USER} chmod 600 /home/${APP_USER}/.ssh/authorized_keys - sudo -u ${APP_USER} cat /home/${APP_USER}/.ssh/id_ed25519 # private — paste into GH secret SSH_KEY + sudo -u ${APP_USER} cat /home/${APP_USER}/.ssh/id_ed25519 # paste into GH secret SSH_KEY 3. Add these GitHub repository secrets at https://github.com/emeraldtarek/from-zero/settings/secrets/actions: @@ -156,7 +162,18 @@ Next steps: bash deploy/deploy.sh ' -6. From now on, every push to main triggers GH Actions → SSH → deploy.sh. + App listens on 127.0.0.1:3717 but isn't reachable yet — Caddy is still + on its default config. + +6. Harden with Cloudflare. In Claude Code, say "harden the box" or run the + playbook in .claude/skills/cloudflare-harden/SKILL.md. It walks through: + - Origin CA cert generation + install via deploy/install-origin-cert.sh + - Setting SSL/TLS mode to Full (strict) + - Flipping DNS to proxied + - Locking UFW to Cloudflare IPs via deploy/lock-origin-to-cloudflare.sh + - (optional) Adding a Cloudflare Access gate + +7. From now on, every push to main triggers GH Actions → SSH → deploy.sh. App will be live at: https://${DOMAIN} ============================================================================== diff --git a/deploy/install-origin-cert.sh b/deploy/install-origin-cert.sh new file mode 100755 index 0000000..4e7823b --- /dev/null +++ b/deploy/install-origin-cert.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# install-origin-cert.sh — install a Cloudflare Origin CA cert + the project +# Caddyfile on a remote VPS, then reload Caddy. +# +# Run from your laptop: +# bash deploy/install-origin-cert.sh [ssh-user] +# +# Idempotent. Safe to rerun on cert rotation. +# +# Assumes: +# - The deploy/Caddyfile in this repo references /etc/caddy/tls/origin.{pem,key} +# - You have ssh access to the box as root (or the supplied user) by key +# - The server has caddy installed (deploy/bootstrap.sh handles that) + +set -euo pipefail + +CERT="${1:-}" +KEY="${2:-}" +SERVER_IP="${3:-}" +SSH_USER="${4:-root}" + +if [[ -z "${CERT}" || -z "${KEY}" || -z "${SERVER_IP}" ]]; then + cat >&2 < [ssh-user=root] + +example: + $0 /tmp/cf-origin/from-zero.emeraldlake.io.pem /tmp/cf-origin/from-zero.emeraldlake.io.key 178.105.102.91 +USAGE + exit 1 +fi + +for f in "${CERT}" "${KEY}"; do + [[ -f "${f}" ]] || { echo "Missing file: ${f}" >&2; exit 1; } +done + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CADDYFILE="${SCRIPT_DIR}/Caddyfile" +[[ -f "${CADDYFILE}" ]] || { echo "Missing ${CADDYFILE}" >&2; exit 1; } + +# Sanity-check the cert + key pair before sending it anywhere. +CERT_PUB=$(openssl x509 -in "${CERT}" -pubkey -noout 2>/dev/null | openssl md5 | awk '{print $NF}') +KEY_PUB=$(openssl pkey -in "${KEY}" -pubout 2>/dev/null | openssl md5 | awk '{print $NF}') +if [[ "${CERT_PUB}" != "${KEY_PUB}" ]]; then + echo "Cert and key do not match — aborting." >&2 + echo " cert pubkey md5: ${CERT_PUB}" >&2 + echo " key pubkey md5: ${KEY_PUB}" >&2 + exit 1 +fi +echo "==> cert + key pair verified (pubkey md5 ${CERT_PUB})" + +REMOTE_CERT="/tmp/origin.pem.$$" +REMOTE_KEY="/tmp/origin.key.$$" +REMOTE_CADDYFILE="/tmp/Caddyfile.$$" + +echo "==> scp cert, key, Caddyfile to ${SSH_USER}@${SERVER_IP}" +scp -q "${CERT}" "${SSH_USER}@${SERVER_IP}:${REMOTE_CERT}" +scp -q "${KEY}" "${SSH_USER}@${SERVER_IP}:${REMOTE_KEY}" +scp -q "${CADDYFILE}" "${SSH_USER}@${SERVER_IP}:${REMOTE_CADDYFILE}" + +echo "==> install + reload Caddy" +ssh "${SSH_USER}@${SERVER_IP}" "REMOTE_CERT='${REMOTE_CERT}' REMOTE_KEY='${REMOTE_KEY}' REMOTE_CADDYFILE='${REMOTE_CADDYFILE}' bash -s" <<'REMOTE' +set -euo pipefail + +install -d -m 0750 -o caddy -g caddy /etc/caddy/tls +install -m 0640 -o caddy -g caddy "${REMOTE_CERT}" /etc/caddy/tls/origin.pem +install -m 0600 -o caddy -g caddy "${REMOTE_KEY}" /etc/caddy/tls/origin.key +install -m 0644 -o root -g root "${REMOTE_CADDYFILE}" /etc/caddy/Caddyfile +rm -f "${REMOTE_CERT}" "${REMOTE_KEY}" "${REMOTE_CADDYFILE}" + +caddy validate --config /etc/caddy/Caddyfile +systemctl reload caddy || systemctl restart caddy +sleep 1 +echo "---- recent caddy logs ----" +journalctl -u caddy -n 12 --no-pager +REMOTE + +echo +echo "==> done." +echo " Visitors hit Cloudflare's edge cert; origin presents this Origin CA cert." +echo " Next steps (if a fresh harden): flip CF SSL to strict, proxy DNS," +echo " then run deploy/lock-origin-to-cloudflare.sh on the box."