diff --git a/convex/domains/graph/applyGraphPatch.ts b/convex/domains/graph/applyGraphPatch.ts new file mode 100644 index 000000000..4c0bacab2 --- /dev/null +++ b/convex/domains/graph/applyGraphPatch.ts @@ -0,0 +1,268 @@ +/** + * Validates and applies a graph patch proposed by the expansion agent. + * + * The agent NEVER writes directly to the graph — it proposes a structured + * patch, and this mutation validates bounds, deduplicates, and applies. + * + * Pattern: scratchpad-first (agent proposes, mutation validates) + * Prior art: + * - Roam Research: structured backlink persistence after outline parsing + * - Anthropic: Building Effective Agents (2024), tool output validation + * + * Invariants: + * BOUND — max 50 claims, 100 edges, 50 backlinks per patch + * HONEST_STATUS — returns actual counts, not claimed counts + * DETERMINISTIC — dedup by (subject, predicate, object) triple + * SSRF — sourceUrls validated before storing + * + * See: docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md §3.2 + */ + +import { v } from "convex/values"; +import { internalMutation } from "../../_generated/server"; + +// ── Constants ──────────────────────────────────────────────────────────── + +const MAX_CLAIMS = 50; +const MAX_EDGES = 100; +const MAX_BACKLINKS = 50; + +// ── SSRF validation ────────────────────────────────────────────────────── + +const SSRF_BLOCKED = [ + /^localhost$/i, + /^127\./, + /^10\./, + /^172\.(1[6-9]|2\d|3[01])\./, + /^192\.168\./, + /^169\.254\./, + /^0\.0\.0\.0$/, + /^\[::1\]$/, + /^metadata\.google\.internal$/i, +]; + +function isUrlSafe(url: string): boolean { + try { + const parsed = new URL(url); + if (!["http:", "https:"].includes(parsed.protocol)) return false; + return !SSRF_BLOCKED.some((p) => p.test(parsed.hostname)); + } catch { + return false; + } +} + +// ── DETERMINISTIC: stable triple hash for dedup ────────────────────────── + +function tripleKey(subject: string, predicate: string, object: string): string { + return `${subject.toLowerCase().trim()}::${predicate.toLowerCase().trim()}::${object.toLowerCase().trim()}`; +} + +// ── Main patch mutation ────────────────────────────────────────────────── + +export const applyPatch = internalMutation({ + args: { + targetEntityId: v.id("entityProfiles"), + agentRunId: v.string(), + userId: v.optional(v.string()), + + claims: v.array( + v.object({ + subject: v.string(), + predicate: v.string(), + object: v.string(), + claimText: v.string(), + sourceUrls: v.array(v.string()), + isHighConfidence: v.boolean(), + }), + ), + + backlinks: v.array( + v.object({ + targetEntityName: v.string(), + backlinkType: v.union( + v.literal("relatedTo"), + v.literal("causes"), + v.literal("supports"), + v.literal("derived"), + ), + confidence: v.number(), + sourceContext: v.optional(v.string()), + }), + ), + }, + + handler: async (ctx, args) => { + let claimsCreated = 0; + let edgesCreated = 0; + let backlinksCreated = 0; + + // ── 1. Find or create knowledge graph for this entity ─────────── + + // Find or create a knowledge graph scoped to this entity expansion. + // knowledgeGraphs requires: name, sourceType, sourceId, userId, isOddOneOut, + // claimCount, edgeCount, lastBuilt, createdAt, updatedAt. + let graphDoc = await ctx.db + .query("knowledgeGraphs") + .filter((q) => + q.and( + q.eq(q.field("sourceType"), "entity"), + q.eq(q.field("sourceId"), args.targetEntityId), + ), + ) + .first(); + + const now = Date.now(); + + if (!graphDoc) { + const graphId = await ctx.db.insert("knowledgeGraphs", { + name: `Expansion: ${args.targetEntityId}`, + sourceType: "entity", + sourceId: args.targetEntityId, + // userId is required by knowledgeGraphs schema (v.id("users")). + // Cast from string — the expansion action passes a Clerk user ID. + userId: (args.userId ?? "system") as any, + isOddOneOut: false, + claimCount: 0, + edgeCount: 0, + lastBuilt: now, + createdAt: now, + updatedAt: now, + }); + graphDoc = await ctx.db.get(graphId); + } + + if (!graphDoc) { + return { claimsCreated: 0, edgesCreated: 0, backlinksCreated: 0 }; + } + + const graphId = graphDoc._id; + + // ── 2. Deduplicate + insert claims (BOUND: max 50) ───────────── + + // Load existing claims for dedup + const existingClaims = await ctx.db + .query("graphClaims") + .filter((q) => q.eq(q.field("graphId"), graphId)) + .take(500); // BOUND_READ + + const existingTriples = new Set( + existingClaims.map((c) => tripleKey(c.subject, c.predicate, c.object)), + ); + + const newClaimIds: Array<{ id: any; subject: string }> = []; + + for (const claim of args.claims.slice(0, MAX_CLAIMS)) { + const key = tripleKey(claim.subject, claim.predicate, claim.object); + if (existingTriples.has(key)) continue; // DETERMINISTIC: skip dupes + + // SSRF: validate source URLs + const safeUrls = claim.sourceUrls.filter(isUrlSafe); + + const claimNow = Date.now(); + const claimId = await ctx.db.insert("graphClaims", { + graphId, + subject: claim.subject.slice(0, 200), + predicate: claim.predicate.slice(0, 100), + object: claim.object.slice(0, 500), + claimText: claim.claimText.slice(0, 500), + sourceDocIds: [], // expansion-sourced claims link via sourceUrls, not docs + sourceSnippets: safeUrls.slice(0, 5), // Store source URLs as snippets for traceability + extractedAt: claimNow, + isHighConfidence: claim.isHighConfidence, + createdAt: claimNow, + }); + + newClaimIds.push({ id: claimId, subject: claim.subject }); + existingTriples.add(key); + claimsCreated++; + } + + // ── 3. Create edges between claims (BOUND: max 100) ──────────── + + // For now, create "relatedTo" edges between claims about the same entity + const claimsBySubject = new Map(); + for (const nc of newClaimIds) { + const key = nc.subject.toLowerCase(); + if (!claimsBySubject.has(key)) claimsBySubject.set(key, []); + claimsBySubject.get(key)!.push(nc.id); + } + + let edgeCount = 0; + for (const [, ids] of claimsBySubject) { + if (ids.length < 2) continue; + // Create chain edges between sequential claims + for (let i = 0; i < ids.length - 1 && edgeCount < MAX_EDGES; i++) { + await ctx.db.insert("graphEdges", { + graphId, + fromClaimId: ids[i], + toClaimId: ids[i + 1], + edgeType: "relatedTo", + isStrong: false, + createdAt: Date.now(), + }); + edgeCount++; + edgesCreated++; + } + } + + // ── 4. Create backlinks (BOUND: max 50) ───────────────────────── + + for (const bl of args.backlinks.slice(0, MAX_BACKLINKS)) { + // Try to resolve the target entity by name + const targetEntity = await ctx.db + .query("entityProfiles") + .filter((q) => + q.eq( + q.field("canonicalName"), + bl.targetEntityName, + ), + ) + .first(); + + if (!targetEntity) continue; // Skip unresolvable entities + + // Dedup: check if this exact backlink exists + const existingBacklink = await ctx.db + .query("backlinks") + .withIndex("by_source", (q) => + q.eq("sourceType", "claim").eq("sourceId", args.targetEntityId), + ) + .filter((q) => + q.and( + q.eq(q.field("targetEntityId"), targetEntity._id), + q.eq(q.field("backlinkType"), bl.backlinkType), + ), + ) + .first(); + + if (existingBacklink) continue; + + await ctx.db.insert("backlinks", { + sourceType: "claim", + sourceId: args.targetEntityId, + targetEntityId: targetEntity._id, + backlinkType: bl.backlinkType, + confidence: Math.max(0, Math.min(1, bl.confidence)), // Clamp 0-1 + sourceContext: bl.sourceContext?.slice(0, 200), + createdBy: "agent", + agentRunId: args.agentRunId, + createdAt: Date.now(), + }); + + backlinksCreated++; + } + + // Update graph counters (honest counts) + if (claimsCreated > 0 || edgesCreated > 0) { + await ctx.db.patch(graphId, { + claimCount: graphDoc.claimCount + claimsCreated, + edgeCount: graphDoc.edgeCount + edgesCreated, + lastBuilt: Date.now(), + updatedAt: Date.now(), + }); + } + + // HONEST_STATUS: return actual persisted counts + return { claimsCreated, edgesCreated, backlinksCreated }; + }, +}); diff --git a/convex/domains/graph/expandEntity.ts b/convex/domains/graph/expandEntity.ts new file mode 100644 index 000000000..366ac54ba --- /dev/null +++ b/convex/domains/graph/expandEntity.ts @@ -0,0 +1,588 @@ +/** + * Entity expansion action — the core of the expandable graph notebook. + * + * When a user clicks [⊕ Expand] on a mention chip, this action: + * 1. Validates idempotency (reject duplicate runs) + * 2. Searches via Linkup for entity intelligence + * 3. Extracts SPO triples via Gemini + * 4. Applies a validated graph patch (new claims, edges, backlinks) + * 5. Updates the expansion snapshot for instant hover preview + * + * Pattern: orchestrator-workers (single expansion run, multiple search queries) + * Prior art: + * - Roam Research: bidirectional backlinks, block-level graph traversal + * - Anthropic: Building Effective Agents (2024), orchestrator-workers + * - Existing: convex/domains/search/deepDiligence.ts searchWithFallback() + * + * See: docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md + */ + +import { v } from "convex/values"; +import { + action, + internalAction, + internalMutation, + mutation, + query, +} from "../../_generated/server"; +import { internal } from "../../_generated/api"; +import { + searchWithFallback, + readBoundedResponse, + isUrlSafe, + GEMINI_API_URL, + SEARCH_TIMEOUT_MS, + MAX_RESPONSE_BYTES, +} from "../search/linkupClient.js"; + +// ── Constants ──────────────────────────────────────────────────────────── + +/** BOUND: max search queries per expansion run */ +const MAX_SEARCH_QUERIES = 5; +/** BOUND: max claims the agent can propose per patch */ +const MAX_CLAIMS_PER_PATCH = 50; +/** BOUND: max edges per patch */ +const MAX_EDGES_PER_PATCH = 100; +/** BOUND: max backlinks per patch */ +const MAX_BACKLINKS_PER_PATCH = 50; +/** TIMEOUT: total action wall-clock budget (ms) */ +const ACTION_TIMEOUT_MS = 55_000; // 55s to leave headroom under Convex 60s limit +/** Snapshot staleness default */ +const DEFAULT_STALE_MS = 24 * 60 * 60 * 1000; // 24 hours +/** BOUND: max key facts in snapshot */ +const MAX_KEY_FACTS = 10; +/** BOUND: max recent claims in snapshot */ +const MAX_RECENT_CLAIMS = 20; + + +// ── Public mutation: start an expansion ────────────────────────────────── + +/** + * Client calls this to start an expansion. Returns < 50ms with the runId. + * The actual work runs in a scheduled action. + */ +export const startExpansion = mutation({ + args: { + targetEntityId: v.id("entityProfiles"), + targetBlockId: v.optional(v.string()), + targetDocumentId: v.optional(v.id("documents")), + userId: v.string(), + }, + handler: async (ctx, args) => { + // Idempotency: check for active run on same entity by same user + const existing = await ctx.db + .query("expansionRuns") + .withIndex("by_entity", (q) => q.eq("targetEntityId", args.targetEntityId)) + .filter((q) => + q.and( + q.eq(q.field("userId"), args.userId), + q.or( + q.eq(q.field("status"), "queued"), + q.eq(q.field("status"), "searching"), + q.eq(q.field("status"), "extracting"), + q.eq(q.field("status"), "persisting"), + ), + ), + ) + .first(); + + if (existing) { + return { runId: existing.runId, deduplicated: true }; + } + + const now = Date.now(); + const runId = `exp_${args.targetEntityId}_${args.userId}_${now}`; + + const docId = await ctx.db.insert("expansionRuns", { + runId, + userId: args.userId, + targetEntityId: args.targetEntityId, + targetBlockId: args.targetBlockId, + targetDocumentId: args.targetDocumentId, + status: "queued", + claimsCreated: 0, + edgesCreated: 0, + sourcesFound: 0, + searchQueries: 0, + maxSearchQueries: MAX_SEARCH_QUERIES, + retryCount: 0, + createdAt: now, + }); + + // Schedule the expansion action to run immediately + await ctx.scheduler.runAfter(0, internal.domains.graph.expandEntity.executeExpansion, { + expansionRunId: docId, + }); + + return { runId, deduplicated: false }; + }, +}); + +// ── Query: expansion run status ────────────────────────────────────────── + +export const getExpansionStatus = query({ + args: { targetEntityId: v.id("entityProfiles") }, + handler: async (ctx, args) => { + return await ctx.db + .query("expansionRuns") + .withIndex("by_entity", (q) => q.eq("targetEntityId", args.targetEntityId)) + .order("desc") + .first(); + }, +}); + +export const getExpansionSnapshot = query({ + args: { entityId: v.id("entityProfiles") }, + handler: async (ctx, args) => { + return await ctx.db + .query("expansionSnapshots") + .withIndex("by_entity", (q) => q.eq("entityId", args.entityId)) + .first(); + }, +}); + +// ── Internal: the main expansion action ────────────────────────────────── + +export const executeExpansion = internalAction({ + args: { + expansionRunId: v.id("expansionRuns"), + }, + handler: async (ctx, args) => { + const startTime = Date.now(); + + const geminiKey = process.env.GEMINI_API_KEY; + const linkupKey = process.env.LINKUP_API_KEY; + + // Load the run record + const run = await ctx.runQuery(internal.domains.graph.expandEntity.internalGetRun, { + runId: args.expansionRunId, + }); + if (!run || run.status !== "queued") return; + + // Load entity profile + const entity = await ctx.runQuery(internal.domains.graph.expandEntity.internalGetEntity, { + entityId: run.targetEntityId, + }); + if (!entity) { + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunStatus, { + runId: args.expansionRunId, + status: "failed", + errorMessage: "Entity not found", + completedAt: Date.now(), + }); + return; + } + + const entityName = entity.canonicalName; + const entityType = entity.entityType ?? "company"; + + try { + // ── Phase 1: Search ────────────────────────────────────────── + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunStatus, { + runId: args.expansionRunId, + status: "searching", + }); + + const searchQueries = buildSearchQueries(entityName, entityType); + const allSnippets: string[] = []; + const allSources: Array<{ url: string; title: string }> = []; + let searchCount = 0; + + for (const query of searchQueries.slice(0, MAX_SEARCH_QUERIES)) { + if (Date.now() - startTime > ACTION_TIMEOUT_MS) break; + + const result = await searchWithFallback(query, linkupKey, geminiKey); + searchCount++; + allSnippets.push(...result.snippets); + allSources.push(...result.sources); + } + + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunSearchCount, { + runId: args.expansionRunId, + searchQueries: searchCount, + sourcesFound: allSources.length, + }); + + if (allSnippets.length === 0) { + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunStatus, { + runId: args.expansionRunId, + status: "completed", + completedAt: Date.now(), + }); + return; + } + + // ── Phase 2: Extract ───────────────────────────────────────── + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunStatus, { + runId: args.expansionRunId, + status: "extracting", + }); + + const claims = await extractClaims(entityName, allSnippets, allSources, geminiKey); + + // ── Phase 3: Persist ───────────────────────────────────────── + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunStatus, { + runId: args.expansionRunId, + status: "persisting", + }); + + const patchResult = await ctx.runMutation( + internal.domains.graph.applyGraphPatch.applyPatch, + { + targetEntityId: run.targetEntityId, + agentRunId: run.runId, + userId: run.userId, + claims: claims.slice(0, MAX_CLAIMS_PER_PATCH).map((c) => ({ + subject: c.subject, + predicate: c.predicate, + object: c.object, + claimText: c.claimText, + sourceUrls: c.sourceUrls.filter(isUrlSafe), + isHighConfidence: c.isHighConfidence, + })), + backlinks: claims + .filter((c) => c.relatedEntities?.length) + .flatMap((c) => + (c.relatedEntities ?? []).map((re) => ({ + targetEntityName: re, + backlinkType: "derived" as const, + confidence: c.isHighConfidence ? 0.8 : 0.5, + sourceContext: c.claimText.slice(0, 200), + })), + ) + .slice(0, MAX_BACKLINKS_PER_PATCH), + }, + ); + + // ── Phase 4: Update snapshot ───────────────────────────────── + const summary = await generateSummary(entityName, claims, geminiKey); + + await ctx.runMutation(internal.domains.graph.expandEntity.upsertSnapshot, { + entityId: run.targetEntityId, + summary, + keyFacts: claims + .filter((c) => c.isHighConfidence) + .slice(0, MAX_KEY_FACTS) + .map((c) => c.claimText), + recentClaims: claims.slice(0, MAX_RECENT_CLAIMS).map((c) => ({ + claimText: c.claimText, + predicate: c.predicate, + confidence: c.isHighConfidence, + sourceUrl: c.sourceUrls[0], + })), + }); + + // ── Final status ───────────────────────────────────────────── + const wallClockMs = Date.now() - startTime; + const finalStatus = + patchResult.claimsCreated > 0 || patchResult.backlinksCreated > 0 + ? "completed" + : "partial"; + + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunFinal, { + runId: args.expansionRunId, + status: finalStatus as "completed" | "partial", + claimsCreated: patchResult.claimsCreated, + edgesCreated: patchResult.edgesCreated, + wallClockMs, + completedAt: Date.now(), + }); + } catch (error: any) { + await ctx.runMutation(internal.domains.graph.expandEntity.updateRunStatus, { + runId: args.expansionRunId, + status: "failed", + errorMessage: error?.message?.slice(0, 500) ?? "Unknown error", + completedAt: Date.now(), + }); + } + }, +}); + +// ── Internal queries/mutations for the action ──────────────────────────── + +export const internalGetRun = query({ + args: { runId: v.id("expansionRuns") }, + handler: async (ctx, args) => ctx.db.get(args.runId), +}); + +export const internalGetEntity = query({ + args: { entityId: v.id("entityProfiles") }, + handler: async (ctx, args) => ctx.db.get(args.entityId), +}); + +export const updateRunStatus = internalMutation({ + args: { + runId: v.id("expansionRuns"), + status: v.union( + v.literal("queued"), + v.literal("searching"), + v.literal("extracting"), + v.literal("persisting"), + v.literal("completed"), + v.literal("partial"), + v.literal("failed"), + ), + errorMessage: v.optional(v.string()), + completedAt: v.optional(v.number()), + }, + handler: async (ctx, args) => { + const patch: Record = { status: args.status }; + if (args.errorMessage !== undefined) patch.errorMessage = args.errorMessage; + if (args.completedAt !== undefined) patch.completedAt = args.completedAt; + await ctx.db.patch(args.runId, patch); + }, +}); + +export const updateRunSearchCount = internalMutation({ + args: { + runId: v.id("expansionRuns"), + searchQueries: v.number(), + sourcesFound: v.number(), + }, + handler: async (ctx, args) => { + await ctx.db.patch(args.runId, { + searchQueries: args.searchQueries, + sourcesFound: args.sourcesFound, + }); + }, +}); + +export const updateRunFinal = internalMutation({ + args: { + runId: v.id("expansionRuns"), + status: v.union(v.literal("completed"), v.literal("partial")), + claimsCreated: v.number(), + edgesCreated: v.number(), + wallClockMs: v.number(), + completedAt: v.number(), + }, + handler: async (ctx, args) => { + await ctx.db.patch(args.runId, { + status: args.status, + claimsCreated: args.claimsCreated, + edgesCreated: args.edgesCreated, + wallClockMs: args.wallClockMs, + completedAt: args.completedAt, + }); + }, +}); + +export const upsertSnapshot = internalMutation({ + args: { + entityId: v.id("entityProfiles"), + summary: v.string(), + keyFacts: v.array(v.string()), + recentClaims: v.array( + v.object({ + claimText: v.string(), + predicate: v.string(), + confidence: v.boolean(), + sourceUrl: v.optional(v.string()), + }), + ), + }, + handler: async (ctx, args) => { + const existing = await ctx.db + .query("expansionSnapshots") + .withIndex("by_entity", (q) => q.eq("entityId", args.entityId)) + .first(); + + // Count backlinks for this entity (BOUND_READ: limit 200 for count) + const backlinks = await ctx.db + .query("backlinks") + .withIndex("by_target", (q) => q.eq("targetEntityId", args.entityId)) + .take(200); + + const now = Date.now(); + + if (existing) { + await ctx.db.patch(existing._id, { + summary: args.summary, + keyFacts: args.keyFacts.slice(0, MAX_KEY_FACTS), + recentClaims: args.recentClaims.slice(0, MAX_RECENT_CLAIMS), + backlinkCount: backlinks.length, + lastExpanded: now, + version: existing.version + 1, + }); + } else { + await ctx.db.insert("expansionSnapshots", { + entityId: args.entityId, + summary: args.summary, + keyFacts: args.keyFacts.slice(0, MAX_KEY_FACTS), + recentClaims: args.recentClaims.slice(0, MAX_RECENT_CLAIMS), + backlinkCount: backlinks.length, + lastExpanded: now, + version: 1, + staleAfterMs: DEFAULT_STALE_MS, + }); + } + }, +}); + +// ── Pure functions (no Convex context) ─────────────────────────────────── + +function buildSearchQueries(entityName: string, entityType: string): string[] { + const queries = [ + `${entityName} latest news 2026`, + `${entityName} funding valuation revenue`, + `${entityName} leadership team executives`, + ]; + if (entityType === "company") { + queries.push(`${entityName} competitors market position`); + queries.push(`${entityName} product launches partnerships`); + } else if (entityType === "person") { + queries.push(`${entityName} career background biography`); + queries.push(`${entityName} recent statements opinions`); + } else { + queries.push(`${entityName} overview analysis`); + queries.push(`${entityName} trends developments`); + } + return queries; +} + + +interface ExtractedClaim { + subject: string; + predicate: string; + object: string; + claimText: string; + sourceUrls: string[]; + isHighConfidence: boolean; + relatedEntities?: string[]; +} + +async function extractClaims( + entityName: string, + snippets: string[], + sources: Array<{ url: string; title: string }>, + geminiKey: string | undefined, +): Promise { + // Baseline: extract simple claims from snippets without LLM + const baselineClaims: ExtractedClaim[] = snippets + .filter((s) => s.length > 30) + .slice(0, 8) + .map((s) => ({ + subject: entityName, + predicate: "mentioned_in", + object: s.slice(0, 200), + claimText: s.slice(0, 300), + sourceUrls: sources.slice(0, 2).map((src) => src.url), + isHighConfidence: false, + })); + + if (!geminiKey || snippets.length === 0) return baselineClaims; + + try { + const combinedSnippets = snippets.slice(0, 10).join("\n---\n").slice(0, 8000); + + const resp = await fetch(`${GEMINI_API_URL}?key=${geminiKey}`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [ + { + parts: [ + { + text: `Extract structured facts about "${entityName}" from the following text. Return a JSON array of objects with these fields: +- subject: the entity or sub-entity +- predicate: the relationship (e.g. "has_revenue", "founded_by", "raised_funding", "competes_with", "headquartered_in", "employs", "launched_product") +- object: the value or target entity +- claimText: a concise natural language sentence stating the fact +- isHighConfidence: boolean (true if the fact includes specific numbers, dates, or named sources) +- relatedEntities: array of other entity names mentioned in this fact + +Return ONLY the JSON array, no other text. + +Text: +${combinedSnippets}`, + }, + ], + }, + ], + generationConfig: { temperature: 0, maxOutputTokens: 4000 }, + }), + signal: AbortSignal.timeout(SEARCH_TIMEOUT_MS), + }); + + if (!resp.ok) return baselineClaims; + + const data = (await resp.json()) as any; + const text = data?.candidates?.[0]?.content?.parts?.[0]?.text ?? ""; + + // Parse JSON from the response (handle markdown code fences) + const jsonMatch = text.match(/\[[\s\S]*\]/); + if (!jsonMatch) return baselineClaims; + + const parsed = JSON.parse(jsonMatch[0]) as any[]; + const sourceUrls = sources.map((s) => s.url); + + return parsed + .filter( + (c: any) => + typeof c.subject === "string" && + typeof c.predicate === "string" && + typeof c.object === "string", + ) + .slice(0, MAX_CLAIMS_PER_PATCH) + .map((c: any) => ({ + subject: String(c.subject).slice(0, 200), + predicate: String(c.predicate).slice(0, 100), + object: String(c.object).slice(0, 500), + claimText: String(c.claimText ?? `${c.subject} ${c.predicate} ${c.object}`).slice(0, 500), + sourceUrls: sourceUrls.slice(0, 3), + isHighConfidence: Boolean(c.isHighConfidence), + relatedEntities: Array.isArray(c.relatedEntities) + ? c.relatedEntities.map(String).slice(0, 5) + : [], + })); + } catch { + return baselineClaims; + } +} + +async function generateSummary( + entityName: string, + claims: ExtractedClaim[], + geminiKey: string | undefined, +): Promise { + if (!geminiKey || claims.length === 0) { + return `${entityName} — ${claims.length} facts discovered.`; + } + + try { + const factsText = claims + .slice(0, 15) + .map((c) => `- ${c.claimText}`) + .join("\n"); + + const resp = await fetch(`${GEMINI_API_URL}?key=${geminiKey}`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [ + { + parts: [ + { + text: `Write a 2-3 sentence summary about "${entityName}" based on these facts:\n${factsText}\n\nBe concise and factual. Do not speculate.`, + }, + ], + }, + ], + generationConfig: { temperature: 0.1, maxOutputTokens: 300 }, + }), + signal: AbortSignal.timeout(10_000), + }); + + if (resp.ok) { + const data = (await resp.json()) as any; + return ( + data?.candidates?.[0]?.content?.parts?.[0]?.text?.slice(0, 500) ?? + `${entityName} — ${claims.length} facts discovered.` + ); + } + } catch { + /* fall through */ + } + + return `${entityName} — ${claims.length} facts discovered.`; +} diff --git a/convex/domains/graph/index.ts b/convex/domains/graph/index.ts new file mode 100644 index 000000000..999a168ad --- /dev/null +++ b/convex/domains/graph/index.ts @@ -0,0 +1,36 @@ +/** + * Graph domain — Expandable mention nodes, backlinks, and entity expansion. + * + * Pattern: Roam-style expandable mention nodes with bidirectional + * cross-linking, web-grounded agent expansion, and lazy infinite traversal. + * + * Prior art: + * - Roam Research: bidirectional backlinks as first-class primitives, block references + * - Obsidian: local graph view, backlinks panel + * + * See: docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md + */ + +export { + startExpansion, + getExpansionStatus, + getExpansionSnapshot, +} from "./expandEntity.js"; + +export { + getBacklinksForEntity, + getBacklinkCount, + getBacklinksByDocument, + getBacklinksBySource, + getBacklinksByRun, + getBacklinkSummary, +} from "./backlinkQueries.js"; + +export { + getLatestRun, + getRunByRunId, + getRunsByUser, + getActiveRuns, + getEntityExpansionHistory, + isExpanding, +} from "./expansionQueries.js"; diff --git a/convex/domains/product/blockOrdering.ts b/convex/domains/product/blockOrdering.ts index 91f7edeec..ac79315ae 100644 --- a/convex/domains/product/blockOrdering.ts +++ b/convex/domains/product/blockOrdering.ts @@ -1,7 +1,7 @@ /** * blockOrdering.ts — Fractional indexing helper for productBlocks. * - * Ports Mew's approach (see src/app/graph/FractionalPositionedList.ts in Ideaflow/mew). + * Uses fractional indexing (Roam Research / outliner-style positioning). * Every block has (positionInt, positionFrac). To insert between two blocks, we * generate a new fractional key between their positionFrac values — O(1), no * re-indexing ever. Different positionInt values are used for "tiers" so that diff --git a/convex/domains/product/schema.ts b/convex/domains/product/schema.ts index e6c272e75..cf004de8b 100644 --- a/convex/domains/product/schema.ts +++ b/convex/domains/product/schema.ts @@ -1487,9 +1487,9 @@ export const productContextItems = defineTable({ .index("by_owner_linked_report", ["ownerKey", "linkedReportId"]); // ────────────────────────────────────────────────────────────────────────── -// Block model (Phase 1 of Ideaflow/Mew-inspired notebook) +// Block model (Phase 1 of Roam-inspired graph notebook) // Inspired by Mew's graph_node + relation_lists with fractional indexing. -// See docs/architecture/IDEAFLOW_BLOCK_NOTEBOOK_ULTRAPLAN.md +// See docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md // ────────────────────────────────────────────────────────────────────────── export const productBlockKindValidator = v.union( diff --git a/convex/domains/search/deepDiligence.ts b/convex/domains/search/deepDiligence.ts index ce808e57f..bdd473a6f 100644 --- a/convex/domains/search/deepDiligence.ts +++ b/convex/domains/search/deepDiligence.ts @@ -34,6 +34,7 @@ import { type ClassifiedSignal, } from "./signalTaxonomy.js"; import { buildSearchForecastGate } from "./searchForecastGate.js"; +import { searchWithFallback, GEMINI_API_URL } from "./linkupClient.js"; // ── Types ──────────────────────────────────────────────────────────────── @@ -133,7 +134,6 @@ const BRANCHES = [ ] as const; const MAX_CHAIN_DEPTH = 3; -const GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent"; // ── Self-Search Detection ──────────────────────────────────────────────── @@ -544,65 +544,6 @@ async function executeBranch( }; } -// ── Search with Fallback ───────────────────────────────────────────────── - -async function searchWithFallback( - query: string, - linkupKey: string | undefined, - geminiKey: string | undefined, -): Promise<{ snippets: string[]; sources: Array<{ url: string; title: string }> }> { - const snippets: string[] = []; - const sources: Array<{ url: string; title: string }> = []; - - // Try Linkup - if (linkupKey) { - try { - const resp = await fetch("https://api.linkup.so/v1/search", { - method: "POST", - headers: { Authorization: `Bearer ${linkupKey}`, "Content-Type": "application/json" }, - body: JSON.stringify({ - q: query, - depth: "deep", - outputType: "sourcedAnswer", - includeInlineCitations: true, - includeSources: true, - maxResults: 8, - }), - signal: AbortSignal.timeout(20_000), - }); - if (resp.ok) { - const data = (await resp.json()) as any; - if (data.answer) snippets.push(data.answer); - for (const s of (data.results ?? data.sources ?? []).slice(0, 8)) { - if (s.content) snippets.push(s.content.slice(0, 1000)); - sources.push({ url: s.url ?? "", title: s.name ?? s.title ?? "" }); - } - } - } catch { /* fallthrough */ } - } - - // Fallback to Gemini grounding - if (snippets.length === 0 && geminiKey) { - try { - const resp = await fetch(`${GEMINI_API_URL}?key=${geminiKey}`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - contents: [{ parts: [{ text: `Research this thoroughly. Provide detailed factual information with specific numbers, names, and dates:\n\n${query}` }] }], - generationConfig: { temperature: 0.1, maxOutputTokens: 3000 }, - }), - signal: AbortSignal.timeout(20_000), - }); - if (resp.ok) { - const data = (await resp.json()) as any; - const text = data?.candidates?.[0]?.content?.parts?.[0]?.text ?? ""; - if (text) snippets.push(text); - } - } catch { /* fallthrough */ } - } - - return { snippets, sources }; -} // ── Entity Resolution ──────────────────────────────────────────────────── diff --git a/convex/domains/search/linkupClient.ts b/convex/domains/search/linkupClient.ts new file mode 100644 index 000000000..c589236ac --- /dev/null +++ b/convex/domains/search/linkupClient.ts @@ -0,0 +1,143 @@ +/** + * Shared Linkup search client with Gemini grounding fallback. + * + * Pattern: SSRF-validated, bounded-read search with graceful fallback + * Prior art: + * - OWASP SSRF Prevention Cheat Sheet + * - Anthropic agentic reliability checklist (BOUND_READ, SSRF, TIMEOUT) + */ + +export const GEMINI_API_URL = + "https://generativelanguage.googleapis.com/v1beta/models/gemini-3.1-flash-lite-preview:generateContent"; + +export const SEARCH_TIMEOUT_MS = 20_000; +export const MAX_RESPONSE_BYTES = 512 * 1024; // 512 KB + +const SSRF_BLOCKED_PATTERNS = [ + /^localhost$/i, + /^127\./, + /^10\./, + /^172\.(1[6-9]|2\d|3[01])\./, + /^192\.168\./, + /^169\.254\./, + /^0\.0\.0\.0$/, + /^\[::1\]$/, + /^metadata\.google\.internal$/i, +]; + +export function isUrlSafe(url: string): boolean { + try { + const parsed = new URL(url); + if (!["http:", "https:"].includes(parsed.protocol)) return false; + return !SSRF_BLOCKED_PATTERNS.some((p) => p.test(parsed.hostname)); + } catch { + return false; + } +} + +export async function readBoundedResponse( + resp: Response, + maxBytes: number = MAX_RESPONSE_BYTES, +): Promise { + const reader = resp.body?.getReader(); + if (!reader) return ""; + + const chunks: Uint8Array[] = []; + let totalBytes = 0; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + totalBytes += value.byteLength; + if (totalBytes > maxBytes) { + await reader.cancel(); + break; + } + chunks.push(value); + } + + const decoder = new TextDecoder(); + return chunks.map((c) => decoder.decode(c, { stream: true })).join(""); +} + +export async function searchWithFallback( + query: string, + linkupKey: string | undefined, + geminiKey: string | undefined, +): Promise<{ snippets: string[]; sources: Array<{ url: string; title: string }> }> { + const snippets: string[] = []; + const sources: Array<{ url: string; title: string }> = []; + + if (linkupKey) { + try { + const resp = await fetch("https://api.linkup.so/v1/search", { + method: "POST", + headers: { + Authorization: `Bearer ${linkupKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + q: query, + depth: "standard", + outputType: "sourcedAnswer", + includeInlineCitations: true, + includeSources: true, + maxResults: 8, + }), + signal: AbortSignal.timeout(SEARCH_TIMEOUT_MS), + }); + + if (resp.ok) { + const text = await readBoundedResponse(resp, MAX_RESPONSE_BYTES); + const data = JSON.parse(text); + + if (data.answer) snippets.push(data.answer); + const resultList = data.results ?? data.sources ?? []; + for (const s of resultList.slice(0, 10)) { + const url = s.url ?? ""; + if (url && isUrlSafe(url)) { + sources.push({ url, title: s.name ?? s.title ?? url }); + if (s.content) snippets.push(s.content.slice(0, 1000)); + else if (s.snippet) snippets.push(s.snippet); + } + } + } + } catch { + /* Linkup failed — fall through to Gemini grounding */ + } + } + + if (snippets.length === 0 && geminiKey) { + try { + const resp = await fetch(`${GEMINI_API_URL}?key=${geminiKey}`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + contents: [ + { + parts: [ + { + text: `Provide a factual, well-sourced summary about: ${query}\n\nInclude specific facts, numbers, dates, and names. Cite sources where possible.`, + }, + ], + }, + ], + generationConfig: { temperature: 0.1, maxOutputTokens: 3000 }, + }), + signal: AbortSignal.timeout(SEARCH_TIMEOUT_MS), + }); + + if (resp.ok) { + const text = await readBoundedResponse(resp, MAX_RESPONSE_BYTES); + const data = JSON.parse(text); + const answer = + data?.candidates?.[0]?.content?.parts?.[0]?.text ?? ""; + if (answer) snippets.push(answer); + } + } catch { + /* Gemini also failed — return empty */ + } + } + + return { snippets, sources }; +} diff --git a/convex/schema.ts b/convex/schema.ts index de18110cb..0ddbb292e 100644 --- a/convex/schema.ts +++ b/convex/schema.ts @@ -16013,4 +16013,146 @@ export default defineSchema({ }) .index("by_cache_key", ["cacheKey"]) .index("by_ttl", ["ttlExpiresAt"]), + + /* ------------------------------------------------------------------ */ + /* EXPANDABLE GRAPH NOTEBOOK — Roam-style mention expansion */ + /* ------------------------------------------------------------------ */ + + /** + * Tracks each mention expansion run: user clicks [⊕ Expand] on a mention + * chip, which queues a Linkup search + Gemini extraction pipeline. + * + * Pattern: async_reliability — idempotency key, bounded retries, honest status. + * Prior art: Roam Research bidirectional backlinks + block references + * + * BOUND: max 5 search queries per run, 50 claims per patch + * HONEST_STATUS: never "completed" if any branch failed → "partial" + * TIMEOUT: 60s AbortController on the action + * DETERMINISTIC: runId = sha256(entityId + userId + timestamp) + */ + expansionRuns: defineTable({ + // Identity + runId: v.string(), + userId: v.string(), + + // Target + targetEntityId: v.id("entityProfiles"), + targetBlockId: v.optional(v.string()), + targetDocumentId: v.optional(v.id("documents")), + + // Execution state machine: queued → searching → extracting → persisting → completed|partial|failed + status: v.union( + v.literal("queued"), + v.literal("searching"), + v.literal("extracting"), + v.literal("persisting"), + v.literal("completed"), + v.literal("partial"), + v.literal("failed"), + ), + + // Results (honest counts — reflect actual persisted data) + claimsCreated: v.number(), + edgesCreated: v.number(), + sourcesFound: v.number(), + + // Budget tracking + searchQueries: v.number(), + maxSearchQueries: v.number(), + wallClockMs: v.optional(v.number()), + + // Error handling + errorMessage: v.optional(v.string()), + retryCount: v.number(), + + // Timestamps + createdAt: v.number(), + completedAt: v.optional(v.number()), + }) + .index("by_entity", ["targetEntityId"]) + .index("by_user", ["userId", "createdAt"]) + .index("by_status", ["status"]) + .index("by_runId", ["runId"]), + + /** + * Bidirectional cross-references between entities/blocks/documents. + * Created automatically when a mention is inserted, or when an expansion + * agent discovers a relationship. + * + * Prior art: Roam Research bidirectional backlinks, Obsidian backlinks panel, + * Roam Research / Obsidian backlink extraction. + * + * BOUND_READ: max 50 backlinks returned per query (paginated) + * DETERMINISTIC: dedup by (sourceType, sourceId, targetEntityId, backlinkType) + */ + backlinks: defineTable({ + // Source (who references) + sourceType: v.union( + v.literal("block"), + v.literal("claim"), + v.literal("document"), + v.literal("signal"), + v.literal("action"), + ), + sourceId: v.string(), + sourceDocumentId: v.optional(v.string()), + sourceContext: v.optional(v.string()), + + // Target (who is referenced) + targetEntityId: v.id("entityProfiles"), + + // Relationship metadata + backlinkType: v.union( + v.literal("mention"), + v.literal("citation"), + v.literal("relatedTo"), + v.literal("causes"), + v.literal("contradicts"), + v.literal("supports"), + v.literal("derived"), + ), + confidence: v.optional(v.number()), + + // Provenance + createdBy: v.union( + v.literal("user"), + v.literal("agent"), + v.literal("system"), + ), + agentRunId: v.optional(v.string()), + + createdAt: v.number(), + }) + .index("by_target", ["targetEntityId", "backlinkType"]) + .index("by_source", ["sourceType", "sourceId"]) + .index("by_document", ["sourceDocumentId"]) + .index("by_agent_run", ["agentRunId"]), + + /** + * Cached expanded content for each entity. Prevents re-expansion on every + * hover and enables instant preview. Refreshed on each successful expansion. + * + * BOUND: keyFacts max 10, recentClaims max 20 + * TTL: staleAfterMs default 86400000 (24h) — re-expansion needed after + */ + expansionSnapshots: defineTable({ + entityId: v.id("entityProfiles"), + + // Cached content + summary: v.string(), + keyFacts: v.array(v.string()), + recentClaims: v.array(v.object({ + claimText: v.string(), + predicate: v.string(), + confidence: v.boolean(), + sourceUrl: v.optional(v.string()), + })), + backlinkCount: v.number(), + lastExpanded: v.number(), + + // Staleness detection + version: v.number(), + staleAfterMs: v.number(), + }) + .index("by_entity", ["entityId"]), }); diff --git a/docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md b/docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md new file mode 100644 index 000000000..33edce57b --- /dev/null +++ b/docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md @@ -0,0 +1,539 @@ +# Expandable Graph Notebook — Architecture + +The critical missing feature: Roam-style expandable mention nodes with +backlink cross-linking, web-grounded agent expansion, and lazy infinite +traversal. Every mention is a live graph node. Click to drill. Agent +enriches with Linkup search. Backlinks cross-reference bidirectionally. + +## Prior art + +- **Roam Research** — bidirectional backlinks as first-class graph primitives, block references +- **Anthropic** — "Building Effective Agents" (2024), orchestrator-workers +- **Notion** — `@mention` inline references with hover previews +- **Obsidian** — local graph view, backlinks panel, transclusion + +--- + +## 1. What Exists (reuse, don't rebuild) + +| Component | Table / File | Shape | +|-----------|-------------|-------| +| Entity graph | `entityProfiles` | canonicalName, entityType, wikidataId, aliases | +| Entity mentions | `entityMentions` | entityId → sourceType + sourceId + mentionType | +| Knowledge graph | `knowledgeGraphs` | sourceType, sourceId, clusterId | +| SPO triples | `graphClaims` | subject, predicate, object, claimText, sourceDocIds | +| Claim edges | `graphEdges` | fromClaimId → toClaimId, edgeType (supports/contradicts/etc.) | +| Claim evidence | `claimEvidence` | claimId → artifactEntityId, sourceSnippet, confidence | +| Source registry | `sourceRegistry` | domain, category, isPinned, isActive | +| Document nodes | `nodes` | documentId, parentId, type, text, json | +| Document relations | `relations` | from, to, relationTypeId | +| Relation types | `relationTypes` | name, icon | +| Deep diligence | `deepDiligence.ts` | 6-branch Linkup+Gemini search, `searchWithFallback()` | +| Tiptap/BlockNote | `UnifiedEditor.tsx` | Full editor with 15+ extensions, Convex sync | +| Agent swarms | `agentSwarms` + `swarmAgentTasks` | Parallel agent coordination | + +**Key insight**: Linkup search is ALREADY wired in `deepDiligence.ts` line 560: +``` +fetch("https://api.linkup.so/v1/search", { + headers: { Authorization: `Bearer ${linkupKey}` }, + body: JSON.stringify({ q: query, depth: "standard", outputType: "searchResults" }) +}) +``` + +--- + +## 2. What's Missing (build these) + +### 2.1 New Convex Tables + +```typescript +// convex/schema.ts — additions + +// ── Expansion Runs ────────────────────────────────────────────────── +// Tracks each mention expansion: who clicked, what was searched, what was found. +// Pattern: async_reliability — 202 + runId, idempotency, bounded retries. +expansionRuns: defineTable({ + // Identity + runId: v.string(), // sha256(entityId + userId + timestamp) + userId: v.string(), + + // Target + targetEntityId: v.id("entityProfiles"), + targetBlockId: v.optional(v.string()), // Block where expansion was triggered + targetDocumentId: v.optional(v.id("documents")), + + // Execution + status: v.union( + v.literal("queued"), + v.literal("searching"), // Linkup search in progress + v.literal("extracting"), // Gemini extraction in progress + v.literal("persisting"), // Writing graph nodes + v.literal("completed"), + v.literal("partial"), // Some branches succeeded + v.literal("failed"), + ), + + // Results + claimsCreated: v.number(), // SPO triples added + edgesCreated: v.number(), // Graph edges added + sourcesFound: v.number(), // Web sources discovered + + // Budget + searchQueries: v.number(), // Linkup calls made + maxSearchQueries: v.number(), // Budget cap (default: 5) + wallClockMs: v.optional(v.number()), // Total execution time + + // Error handling + errorMessage: v.optional(v.string()), + retryCount: v.number(), + + // Timestamps + createdAt: v.number(), + completedAt: v.optional(v.number()), +}) + .index("by_entity", ["targetEntityId"]) + .index("by_user", ["userId", "createdAt"]) + .index("by_status", ["status"]) + .index("by_runId", ["runId"]), + +// ── Backlinks ─────────────────────────────────────────────────────── +// Bidirectional cross-references between entities/blocks/documents. +// Created automatically when a mention is inserted or when an agent +// discovers a relationship during expansion. +backlinks: defineTable({ + // Source (who references) + sourceType: v.union( + v.literal("block"), // A paragraph/heading mentioning an entity + v.literal("claim"), // A knowledge graph claim + v.literal("document"), // A document-level reference + v.literal("signal"), // A daily brief signal + v.literal("action"), // An action item referencing an entity + ), + sourceId: v.string(), // ID of the referencing object + sourceDocumentId: v.optional(v.string()), + sourceContext: v.optional(v.string()), // Surrounding text snippet (≤200 chars) + + // Target (who is referenced) + targetEntityId: v.id("entityProfiles"), + + // Metadata + backlinkType: v.union( + v.literal("mention"), // @mention in editor + v.literal("citation"), // Source citation + v.literal("relatedTo"), // Semantic relationship + v.literal("causes"), // Causal relationship + v.literal("contradicts"), // Contradicting claim + v.literal("supports"), // Supporting evidence + v.literal("derived"), // Agent-discovered link + ), + confidence: v.optional(v.number()), // 0-1, only for derived links + + // Provenance + createdBy: v.union( + v.literal("user"), // Manual mention insertion + v.literal("agent"), // Agent expansion discovered this + v.literal("system"), // Automatic extraction + ), + agentRunId: v.optional(v.string()), // Which expansion created this + + createdAt: v.number(), +}) + .index("by_target", ["targetEntityId", "backlinkType"]) + .index("by_source", ["sourceType", "sourceId"]) + .index("by_document", ["sourceDocumentId"]) + .index("by_agent_run", ["agentRunId"]), + +// ── Expansion Snapshots ───────────────────────────────────────────── +// Cached expanded content for each entity. Prevents re-expansion +// and enables instant preview on hover. +expansionSnapshots: defineTable({ + entityId: v.id("entityProfiles"), + + // Cached content (refreshed on each expansion) + summary: v.string(), // 2-3 sentence entity summary + keyFacts: v.array(v.string()), // Top 5 bullet facts + recentClaims: v.array(v.object({ + claimText: v.string(), + predicate: v.string(), + confidence: v.boolean(), // isHighConfidence from graphClaims + sourceUrl: v.optional(v.string()), + })), + backlinkCount: v.number(), // How many things reference this entity + lastExpanded: v.number(), // Timestamp of last expansion + + // Staleness detection + version: v.number(), // Increments on each expansion + staleAfterMs: v.number(), // Default: 24 hours +}) + .index("by_entity", ["entityId"]), +``` + +### 2.2 Expansion Flow + +``` +User clicks [⊕ Expand] on mention chip + │ + ▼ +┌─────────────────────────┐ +│ Client: startExpansion │ Creates expansionRuns row (status: "queued") +│ mutation (< 50ms) │ Returns runId to client +└──────────┬──────────────┘ + │ ctx.scheduler.runAfter(0, ...) + ▼ +┌─────────────────────────┐ +│ expandEntity action │ Convex action (can call external APIs) +│ │ +│ 1. Check idempotency │ sha256(entityId + userId) → reject if active run +│ 2. Linkup search │ 3-5 queries, "standard" depth +│ 3. Gemini extraction │ Parse search results → SPO triples +│ 4. Deduplicate │ Match against existing graphClaims +│ 5. Persist graph patch │ New claims + edges + backlinks (mutation) +│ 6. Update snapshot │ Cached summary for instant hover preview +│ 7. Status → completed │ +└──────────┬──────────────┘ + │ Convex reactivity pushes to client + ▼ +┌─────────────────────────┐ +│ UI: mention node │ Expands inline showing: +│ re-renders │ • Summary + key facts +│ │ • New claims with source links +│ │ • Backlinks ("Referenced by 3 other notes") +│ │ • [⊕ Expand deeper] for next level +└─────────────────────────┘ +``` + +### 2.3 Mention Node in Tiptap + +Extend the existing BlockNote/Tiptap editor with an expandable mention: + +```typescript +// src/features/editor/extensions/ExpandableMention.ts + +/** + * Custom Tiptap node that renders as an inline mention chip + * but can expand to show entity intelligence inline. + * + * States: + * collapsed: "@Anthropic" chip (default) + * loading: "@Anthropic ◑" chip with spinner + * expanded: "@Anthropic" chip + inline expansion panel below + * + * Attrs stored in Tiptap JSON: + * id: entityProfiles._id + * label: display text + * entityType: "person" | "company" | "topic" | etc. + * expanded: boolean (UI state, not persisted) + */ +``` + +### 2.4 Backlink Panel + +When viewing any entity (via mention expansion or entity page): + +``` +┌─────────────────────────────────────────┐ +│ Anthropic [⊕ Expand]│ +│ AI safety company, founded 2021 │ +│ │ +│ ── Key Facts ──────────────────────────│ +│ • $4B Series E at $60B valuation │ +│ • Claude 4.7 Opus released May 2026 │ +│ • 1,200+ employees │ +│ │ +│ ── Referenced By (12) ─────────────────│ +│ 📄 Daily Brief — "AI infrastructure…" │ +│ 📄 Meeting notes — "vendor review…" │ +│ 📊 Report: Anthropic Q2 Diligence │ +│ 💬 Thread: "Should we migrate to…" │ +│ ···show 8 more │ +│ │ +│ ── Related Entities ───────────────────│ +│ → Google (investor) │ +│ → OpenAI (competitor) │ +│ → Dario Amodei (CEO) │ +│ ···show more │ +│ │ +│ ── Sources (5) ────────────────────────│ +│ [1] sec.gov/edgar/... (verified) │ +│ [2] reuters.com/... (tier 2) │ +│ [3] techcrunch.com/... (tier 2) │ +└─────────────────────────────────────────┘ +``` + +--- + +## 3. Expansion Agent Tools + +The expansion agent gets 5 bounded tools: + +```typescript +const EXPANSION_TOOLS = [ + { + name: "linkup_search", + description: "Search the web via Linkup for entity intelligence", + // Budget: max 5 calls per expansion run + // Uses existing searchWithFallback() from deepDiligence.ts + }, + { + name: "read_entity_graph", + description: "Read existing claims and edges for an entity", + // Reads: graphClaims where subject matches entity + // BOUND_READ: max 100 claims returned + }, + { + name: "read_backlinks", + description: "Read what references this entity across the notebook", + // Reads: backlinks where targetEntityId matches + // BOUND_READ: max 50 backlinks returned + }, + { + name: "propose_graph_patch", + description: "Propose new claims, edges, and backlinks to add", + // Agent NEVER writes directly — proposes a structured patch + // Patch is validated by Convex mutation before applying + }, + { + name: "resolve_entity", + description: "Look up or create an entity in the graph", + // Uses existing resolveEntity() from deepDiligence.ts + // Deduplicates by canonicalName + aliases + }, +]; +``` + +### 3.1 Graph Patch Contract + +The agent proposes changes; a Convex mutation validates and applies: + +```typescript +interface GraphPatch { + targetEntityId: string; + + // New SPO triples to add + newClaims: Array<{ + subject: string; + predicate: string; + object: string; + claimText: string; + sourceUrls: string[]; + sourceSnippets: string[]; + isHighConfidence: boolean; + }>; + + // New edges between claims + newEdges: Array<{ + fromClaimSubject: string; // Matched to existing/new claim + toClaimSubject: string; + edgeType: "supports" | "contradicts" | "mentions" | "causes" | + "relatedTo" | "partOf" | "precedes"; + }>; + + // New backlinks discovered + newBacklinks: Array<{ + sourceType: "claim" | "document" | "signal"; + sourceId: string; + backlinkType: "relatedTo" | "causes" | "supports" | "derived"; + confidence: number; + }>; + + // Entity snapshot update + snapshotUpdate: { + summary: string; + keyFacts: string[]; + }; +} +``` + +### 3.2 Patch Validation (Convex Mutation) + +```typescript +// convex/domains/graph/applyGraphPatch.ts + +/** + * Validates and applies a graph patch proposed by the expansion agent. + * + * Invariants: + * BOUND — max 50 claims, 100 edges, 50 backlinks per patch + * HONEST_STATUS — returns actual counts, not claimed counts + * DETERMINISTIC — deduplicate by (subject, predicate, object) triple + * SSRF — sourceUrls validated before storing + */ +``` + +--- + +## 4. Lazy Infinite Expansion + +The graph grows indefinitely over time, but every query is bounded: + +| Bound | Limit | Why | +|-------|-------|-----| +| Depth per click | 1 level | User clicks to go deeper | +| Claims per expansion | 50 | Prevent single expansion from flooding graph | +| Edges per expansion | 100 | Reasonable relationship density | +| Backlinks returned | 50 | UI pagination for more | +| Search queries per run | 5 | Linkup API budget | +| Sources per run | 10 | Reasonable evidence set | +| Wall clock per run | 60s | Convex action timeout | +| Entity dedup | canonicalName + aliases | Prevent duplicate entities | +| Claim dedup | (subject, predicate, object) triple | Prevent duplicate facts | + +Each expansion adds more graph facts. User can expand again from any +new mention or backlink. The graph is unbounded over time, but every +single query/action has explicit budgets. + +--- + +## 5. Integration with Home Daily Brief + +The Home surface becomes the **primary expansion surface**: + +| Home Section | Graph Integration | +|---|---| +| BLUF signals | Each entity mentioned becomes an expandable mention | +| Report carousel cards | Each card is a mini entity node — click to expand | +| Change cards | Entity name in header is expandable | +| Competing explanations | Entities referenced in explanations are expandable | +| Watch events | Event entities are expandable | +| Actions table | Linked entities are expandable | +| Briefing agent | Agent can call `expand_entity` as a tool | + +### 5.1 Signal → Mention Extraction + +During brief generation (6 AM cron), extract entity mentions from signals: + +```typescript +// In dailyMorningBrief.ts, after signal extraction: +for (const signal of signals) { + const entities = await resolveEntitiesInText(signal.title + " " + signal.summary); + for (const entity of entities) { + await ctx.runMutation(api.domains.graph.createBacklink, { + sourceType: "signal", + sourceId: signal._id, + targetEntityId: entity._id, + backlinkType: "mention", + createdBy: "system", + }); + } +} +``` + +--- + +## 6. State Machines + +### 6.1 Expansion Run Lifecycle + +``` +┌─────────┐ startExpansion() ┌────────────┐ +│ (none) │──────────────────▶│ queued │ +└─────────┘ └─────┬──────┘ + │ scheduler fires + ▼ + ┌────────────┐ + │ searching │ Linkup API calls + └─────┬──────┘ + │ results received + ▼ + ┌────────────┐ + │ extracting │ Gemini parses → SPO triples + └─────┬──────┘ + │ extraction complete + ▼ + ┌────────────┐ + │ persisting │ Graph patch applied + └─────┬──────┘ + ╱ │ + some fail ╱ │ all succeed + ╱ │ + ┌─────────┐ ┌──────────┐ + │ partial │ │completed │ + └─────────┘ └──────────┘ +``` + +### 6.2 Mention Chip States + +``` +┌────────────┐ click [⊕] ┌────────────┐ +│ collapsed │─────────────▶│ loading │ +│ @Entity │ │ @Entity ◑ │ +└──────┬─────┘ └─────┬──────┘ + │ │ expansion complete + │ ▼ + │ ┌────────────┐ + │ │ expanded │ + │ │ @Entity │ + │ │ + panel │ + │ click collapse └─────┬──────┘ + │◀─────────────────────────┘ +``` + +--- + +## 7. Reliability Invariants + +Per `.claude/rules/agentic_reliability.md`: + +| Check | Applied to | Implementation | +|-------|-----------|----------------| +| **BOUND** | Claims per patch | Max 50, reject overflow | +| **BOUND** | Backlinks per query | Max 50, paginated | +| **HONEST_STATUS** | Expansion run status | Never "completed" if any branch failed → "partial" | +| **HONEST_SCORES** | Claim confidence | `isHighConfidence` from evidence, never hardcoded true | +| **TIMEOUT** | Expansion action | 60s AbortController, per-search 10s timeout | +| **SSRF** | Source URLs from Linkup | Validate before storing (no internal IPs) | +| **BOUND_READ** | Linkup response body | 512KB cap per response | +| **ERROR_BOUNDARY** | Mention expansion UI | Error boundary per mention panel | +| **DETERMINISTIC** | Claim dedup | (subject, predicate, object) triple hash | + +--- + +## 8. File Inventory + +| File | Purpose | Status | +|------|---------|--------| +| `docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md` | This document | ✅ | +| `convex/domains/graph/expandEntity.ts` | Expansion action (Linkup + Gemini + persist) | ✅ Created | +| `convex/domains/graph/applyGraphPatch.ts` | Validate + apply graph patch mutation | ✅ Created | +| `convex/domains/graph/backlinkQueries.ts` | Backlink lookup queries | ✅ Created | +| `convex/domains/graph/expansionQueries.ts` | Expansion run status queries | ✅ Created | +| `convex/domains/graph/index.ts` | Barrel exports for graph domain | ✅ Created | +| `src/features/editor/extensions/ExpandableMention.ts` | Tiptap mention extension | ⬜ Create | +| `src/features/graph/components/MentionExpansionPanel.tsx` | Inline expansion panel | ⬜ Create | +| `src/features/graph/components/BacklinkList.tsx` | Backlink cross-reference list | ⬜ Create | +| `src/features/graph/hooks/useEntityExpansion.ts` | Expansion trigger + status hook | ⬜ Create | +| `src/features/graph/hooks/useBacklinks.ts` | Backlink query hook | ⬜ Create | +| `convex/domains/search/deepDiligence.ts` | Existing — `searchWithFallback()` reused | ✅ Exists | +| `src/features/editor/components/UnifiedEditor.tsx` | Existing — register new extension | ✅ Exists | + +--- + +## 9. Migration Plan + +### Phase 1: Schema + Backend (Week 1) ✅ DONE +1. ✅ Added `expansionRuns`, `backlinks`, `expansionSnapshots` tables to schema +2. ✅ Created `expandEntity` action (Linkup + Gemini, idempotency, bounded) +3. ✅ Created `applyGraphPatch` mutation with SSRF + dedup + bound validation +4. ✅ Created `backlinkQueries` with 6 indexed lookups + backlink summary +5. ✅ Created `expansionQueries` with status subscriptions + active run tracking +6. ✅ 29 scenario-based tests (SSRF, dedup, bounds, state machine, backlink taxonomy) +7. ⬜ Wire expansion runs to entity mentions during brief generation + +### Phase 2: Tiptap Extension (Week 2) +6. Create `ExpandableMention` Tiptap node type +7. Register in UnifiedEditor extension list +8. Build `MentionExpansionPanel` React component +9. Build `BacklinkList` component +10. Wire `useEntityExpansion` hook to Convex subscriptions + +### Phase 3: Home Integration (Week 3) +11. Add expandable mentions to BLUF signal text +12. Add expansion CTA to report carousel cards +13. Wire briefing agent `expand_entity` tool +14. Add entity hover preview using `expansionSnapshots` + +### Phase 4: Cross-Linking + Polish (Week 4) +15. Auto-extract entity mentions from new documents (background job) +16. Backlink count badges on entity mentions +17. Graph visualization (mini force-directed view) +18. Keyboard shortcuts for expansion (Enter on focused mention) diff --git a/docs/architecture/REPORTS_AND_ENTITIES.md b/docs/architecture/REPORTS_AND_ENTITIES.md index 05fcfb969..ebbdd3a8e 100644 --- a/docs/architecture/REPORTS_AND_ENTITIES.md +++ b/docs/architecture/REPORTS_AND_ENTITIES.md @@ -2,7 +2,7 @@ **Status:** Living · Last reviewed 2026-04-19 **Owner:** Core team -**Supersedes:** `NODEBENCH_AI_APP_PRD_V1.md`, `nodebench-ai-app-v2.md`, `COCKPIT_WIREFRAMES.md`, `DAY1_USER_JOURNEYS.md`, `ARCHETYPE_GAP_ANALYSIS.md`, `UNIFIED_TEMPORAL_OS.md`, `IDEAFLOW_BLOCK_NOTEBOOK_ULTRAPLAN.md`, `NODEBENCH_COMPLETE_SPEC.md` — archived. +**Supersedes:** `NODEBENCH_AI_APP_PRD_V1.md`, `nodebench-ai-app-v2.md`, `COCKPIT_WIREFRAMES.md`, `DAY1_USER_JOURNEYS.md`, `ARCHETYPE_GAP_ANALYSIS.md`, `UNIFIED_TEMPORAL_OS.md`, `BLOCK_NOTEBOOK_ULTRAPLAN.md`, `NODEBENCH_COMPLETE_SPEC.md` — archived. ## TL;DR diff --git a/public/proto/GRAPH_NOTEBOOK_FEATURES.md b/public/proto/GRAPH_NOTEBOOK_FEATURES.md new file mode 100644 index 000000000..2caaecd47 --- /dev/null +++ b/public/proto/GRAPH_NOTEBOOK_FEATURES.md @@ -0,0 +1,194 @@ +# Graph Notebook Features — home-v3.html Prototype + +Implemented in `public/proto/home-v3.html` as pure HTML/CSS/JS. +These features are the prototype for the expandable graph notebook +described in `docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md`. + +## Prior art + +- **Roam Research** — bidirectional backlinks, block references, outliner-first editing +- **Notion** — `@mention` inline references with hover previews +- **Obsidian** — local graph view, backlinks panel + +--- + +## 1. Source Citation Popovers + +**What:** Clicking `[src:N]` badges shows a positioned popover with source metadata. + +**Behavior:** +- Click `[src:1]` -> popover appears near the badge +- Shows: title, URL (linked), excerpt, date + type +- Click elsewhere or another interactive element -> popover dismisses +- Only one popover active at a time (`activeSrcPop` singleton) + +**Key function:** `showSourcePop(badge)` (~line 8038) + +**Data:** `SOURCE_DATA` object keyed by source number: +```javascript +SOURCE_DATA = { + 1: { title, url, excerpt, date, type }, + ... +} +``` + +**Dismiss fix:** `toggleMentionExpand()` and `toggleWikiExpand()` both +clean up `activeSrcPop` at entry, preventing stale popovers when +`stopPropagation` blocks the document-level dismiss listener. + +--- + +## 2. Mention Expansion (@entity) + +**What:** Clicking `@Company` or `@Person` chips expands an inline entity +reference block below the mention. + +**Chip types:** +- `@Company` — terracotta accent (`var(--accent)`) +- `@Person` — purple (`#a78bfa`) + +**Expansion block contains:** +- Entity header: avatar + name + type + source count + claim count + status badge +- Summary paragraph +- Cited claims with `[src:N]` badges (clickable — opens source popovers) +- Graph edges: relation type + entity name (e.g., "competes with @Google DeepMind") +- Action buttons: "Expand with agent", "Open report", "N backlinks" + +**Key function:** `toggleMentionExpand(mention)` (~line 7891) + +**Data:** `MENTION_DATA` object keyed by entity ID: +```javascript +MENTION_DATA = { + anthropic: { name, type, summary, claims[], edges[], backlinks }, + openai: { ... }, + ... +} +``` + +--- + +## 3. Wiki Link Expansion ([[topic]]) + +**What:** Clicking `[[topic]]` links expands an inline topic reference with +definition and related entities. + +**Styling:** Cyan accent border-left, diamond icon header. + +**Expansion block contains:** +- Topic name with diamond icon +- Close button +- Definition/description paragraph +- Related references section (analytical context from the notebook) +- Topic tags as pill buttons + +**Key function:** `toggleWikiExpand(mention)` (~line 8099) + +**Data:** `WIKI_DATA` object keyed by topic slug: +```javascript +WIKI_DATA = { + 'pricing-power': { name, definition, references[], tags[] }, + 'cap-table-analysis': { ... }, + ... +} +``` + +--- + +## 4. Recursive Agent Expansion + +**What:** Clicking "Expand with agent" inside a mention expansion block +triggers a simulated 4-step agent deep dive. + +**Simulation steps (timed progression):** +1. "Searching Linkup for {entity}..." (0.8s) +2. "Fetching 3 pages (Reuters, TechCrunch, Bloomberg)..." (1.2s) +3. "Extracted 6 new claims, 2 verified..." (1s) +4. "Expansion complete - 4 new claims added" (final state) + +**New claims appended** with `[src:new]` badges and green "new" tags. + +**Depth limiting:** `MAX_EXPAND_DEPTH = 3` prevents infinite recursion +when expanded entities contain clickable mentions. + +**Key function:** Inside `toggleMentionExpand()`, the "Expand with agent" +button onclick handler. + +--- + +## 5. Backlinks Section + +**What:** Below the notebook content, a backlinks section shows all entities +and reports that reference the current entity. + +**Relation types:** MENTIONS, COMPETES, CITES, FOUNDER_OF, RELATED + +**Each backlink shows:** +- Title (report or entity name) +- Excerpt with context +- Relation type badge (color-coded) + +--- + +## 6. Right Rail — Entity Context Card + +**What:** Fills the dead space at the top of the right rail `.ar-thread` +with an entity summary card. + +**Contains:** +- Entity avatar + name + verified badge +- Type line (e.g., "AI safety . San Francisco . Series E") +- 4-metric grid: Claims verified, Sources, Last refreshed, Confidence +- Connected entities with color-coded relationship dots + +**CSS classes:** `.ar-entity-ctx`, `.ar-entity-ctx-avatar`, +`.ar-entity-ctx-metrics`, `.ar-entity-ctx-graph`, `.ar-entity-ctx-edges` + +--- + +## 7. Right Rail — Entity-Contextual Chat + +**What:** The right rail chat thread is contextual to the open notebook entity. + +**Contains:** +- System event: "Notebook opened . Anthropic" +- Agent: "Report health: strong" with tool badges +- User: "Are any claims at risk of going stale?" +- Agent: "2 claims expiring soon" with freshness details +- Action chips: "Refresh expiring sources", "Verify all claims", "Export memo" +- System event: "9 mentions . 5 backlinks" + +**Input placeholder:** "Ask about Anthropic..." + +--- + +## Event Delegation Architecture + +All interactive elements use event delegation from a single document-level +listener. Key coordination: + +- `e.stopPropagation()` on source badges prevents document click from + immediately dismissing the popover +- Mention/wiki handlers explicitly clean up `activeSrcPop` before expanding +- Each expansion type uses a `data-expanded` attribute to track toggle state +- Expansion blocks are inserted via `insertAdjacentHTML('afterend', html)` + +--- + +## Design Tokens + +| Element | Color | CSS | +|---------|-------|-----| +| @Company mention | Terracotta | `var(--accent)` / `#d97757` | +| @Person mention | Purple | `#a78bfa` | +| [[wiki]] link | Cyan | `#67e8f9` | +| #tag | Grey | `var(--ink-faint)` | +| [src:N] badge | Muted | `var(--line-faint)` bg | +| Entity context card | Glass | `color-mix(in srgb, var(--ink) 3%, transparent)` | +| Verified badge | Green | `var(--green)` | + +--- + +## File + +All implementation is in `public/proto/home-v3.html` (single-file prototype). +Architecture spec for React/Convex migration: `docs/architecture/EXPANDABLE_GRAPH_NOTEBOOK.md`. diff --git a/src/features/agents/lib/agentPalette.ts b/src/features/agents/lib/agentPalette.ts index 7d9b06d67..1cc8482ca 100644 --- a/src/features/agents/lib/agentPalette.ts +++ b/src/features/agents/lib/agentPalette.ts @@ -1,7 +1,7 @@ /** * agentPalette — persistent color + icon per diligence agent role. * - * Motivation: Ideaflow / v3 / v4 prototypes all assign persistent colors + * Motivation: v3 / v4 prototypes all assign persistent colors * to each agent role so users build pattern recognition ("purple is Spencer, * green is Maya"). NodeBench's blockType IS the agent role — founder, * product, funding, etc. — so we derive the palette client-side from the diff --git a/src/features/entities/components/notebook/EntityNotebookLive.tsx b/src/features/entities/components/notebook/EntityNotebookLive.tsx index 506f4987d..840fbaea0 100644 --- a/src/features/entities/components/notebook/EntityNotebookLive.tsx +++ b/src/features/entities/components/notebook/EntityNotebookLive.tsx @@ -1,5 +1,5 @@ /** - * EntityNotebookLive — Phase 3-4 of the Ideaflow/Mew-inspired notebook. + * EntityNotebookLive — Phase 3-4 of the Roam-inspired notebook. * * Renders the entity as a single flowing document of persisted blocks from * productBlocks. Focused editable blocks use a Tiptap + Convex ProseMirror diff --git a/src/index.css b/src/index.css index e9813f5ea..e346e8dcc 100644 --- a/src/index.css +++ b/src/index.css @@ -1582,7 +1582,7 @@ html[data-density='spacious'] { /* Placeholder ghost-text — shows ONLY on the currently-focused block's empty paragraph, never on every empty block in the document. Matches - Ideaflow/Lexical pattern (Editor.module.css:69-89): hidden by default, + Roam/Lexical pattern: hidden by default, fades in on focus. A doc with 10 empty lines must not display 10 repeated "Type / for commands…" ghosts — that drowns real content. */ .notebook-sheet [data-testid="notebook-block"][data-block-focused="true"] .ProseMirror p.is-empty::before { @@ -1646,7 +1646,7 @@ html[data-density='spacious'] { } } -/* Lexical/Ideaflow rhythm: zero block margin + min-height per paragraph. +/* Lexical/outliner rhythm: zero block margin + min-height per paragraph. Tight spacing between content blocks; empty blocks collapse to a single line instead of a full paragraph of whitespace. */ .notebook-sheet .ProseMirror p { diff --git a/src/test/Orchestrator.live.e2e.test.ts b/src/test/Orchestrator.live.e2e.test.ts index 859f7e282..b37899f15 100644 --- a/src/test/Orchestrator.live.e2e.test.ts +++ b/src/test/Orchestrator.live.e2e.test.ts @@ -37,7 +37,7 @@ if (!live) { }; // Simple graph: Web search feeds into an answer. No dynamic branching. - const topic = "Research Jacob Cole, Ideaflow, and the company's fundraising for the next round"; + const topic = "Research Anthropic and the company's fundraising for the next round"; const graph = { nodes: [ { id: 's1', kind: 'search', label: 'Web Research', prompt: '{{topic}}' }, diff --git a/src/test/Orchestrator.live.eval.e2e.test.ts b/src/test/Orchestrator.live.eval.e2e.test.ts index 8631f7508..f603db192 100644 --- a/src/test/Orchestrator.live.eval.e2e.test.ts +++ b/src/test/Orchestrator.live.eval.e2e.test.ts @@ -35,7 +35,7 @@ if (!live) { }; // Start with only an eval node. The LLM is instructed to return pass=false and add s1(search) -> a1(answer) - const topic = 'Summarize the latest about Ideaflow and Jacob Cole fundraising'; + const topic = 'Summarize the latest about Anthropic and Dario Amodei fundraising'; const graph = { nodes: [ {