diff --git a/packages/specialists/src/index.ts b/packages/specialists/src/index.ts index 20a51a7..236b7b3 100644 --- a/packages/specialists/src/index.ts +++ b/packages/specialists/src/index.ts @@ -1,3 +1,4 @@ export * from './shared/index.js'; export * from './github/index.js'; export * from './linear/index.js'; +export * from './notion/index.js'; diff --git a/packages/specialists/src/notion/index.ts b/packages/specialists/src/notion/index.ts new file mode 100644 index 0000000..a75a791 --- /dev/null +++ b/packages/specialists/src/notion/index.ts @@ -0,0 +1,2 @@ +export * from './types.js'; +export * from './librarian.js'; diff --git a/packages/specialists/src/notion/librarian.test.ts b/packages/specialists/src/notion/librarian.test.ts new file mode 100644 index 0000000..2dbc69f --- /dev/null +++ b/packages/specialists/src/notion/librarian.test.ts @@ -0,0 +1,306 @@ +import type { VfsEntry } from '@agent-assistant/vfs'; +import { describe, expect, it, vi } from 'vitest'; + +import { createNotionLibrarian, enumerateNotion } from './librarian.js'; + +class InMemoryNotionVfs { + constructor(private readonly entries: VfsEntry[]) {} + + async list(rootPath: string): Promise { + const normalizedRoot = rootPath.endsWith('/') ? rootPath : `${rootPath}/`; + return this.entries.filter((entry) => entry.path === rootPath || entry.path.startsWith(normalizedRoot)); + } + + async search(): Promise { + return this.entries; + } +} + +const notionEntries: VfsEntry[] = [ + { + path: '/notion/pages/launch-plan.json', + type: 'file', + provider: 'notion', + revision: 'rev-1', + updatedAt: '2026-04-17T12:00:00.000Z', + title: 'LaunchPlan', + properties: { + id: 'page-path-hit', + database: 'Roadmap', + tag: 'Urgent,Ops', + author: 'Ada', + url: 'https://notion.so/launch-plan', + }, + }, + { + path: '/notion/databases/property-type-override.json', + type: 'file', + provider: 'notion', + revision: 'rev-2', + updatedAt: '2026-04-17T11:00:00.000Z', + title: 'OverrideViaProperty', + properties: { + id: 'property-type-page-hit', + type: 'page', + database: 'Workspace', + tag: 'Product', + author: 'Grace', + url: 'https://notion.so/property-type-override', + }, + }, + { + path: '/notion/databases/team-directory.json', + type: 'file', + provider: 'notion', + revision: 'rev-3', + updatedAt: '2026-04-17T10:00:00.000Z', + title: 'TeamDirectory', + properties: { + id: 'database-path-hit', + database: 'Operations', + tag: 'Directory', + author: 'Linus', + url: 'https://notion.so/team-directory', + }, + }, + { + path: '/notion/pages/incident-runbook.json', + type: 'file', + provider: 'notion', + revision: 'rev-4', + updatedAt: '2026-04-17T09:00:00.000Z', + title: 'IncidentRunbook', + properties: { + id: 'non-match-page', + database: 'Incidents', + tag: 'Low', + author: 'Dana', + url: 'https://notion.so/incident-runbook', + }, + }, +]; + +function createLibrarian( + entries: VfsEntry[] = notionEntries, + apiFallback?: Parameters[0]['apiFallback'], +) { + return createNotionLibrarian({ + vfs: new InMemoryNotionVfs(entries), + apiFallback, + }); +} + +describe('createNotionLibrarian filter matching', () => { + it('matches type/database/title/tag across metadata-bearing entries', async () => { + const result = await createLibrarian().handler.execute( + 'type:page database:Roadmap title:LaunchPlan tag:Urgent', + ); + + expect(result.status).toBe('complete'); + expect(result.metadata.filters).toEqual({ + type: ['page'], + database: ['Roadmap'], + title: ['LaunchPlan'], + tag: ['Urgent'], + }); + expect(result.evidence.map((item) => item.id)).toEqual(['page-path-hit']); + expect(result.evidence[0]?.content).toEqual( + expect.objectContaining({ + type: 'page', + database: 'Roadmap', + title: 'LaunchPlan', + tag: ['Urgent', 'Ops'], + }), + ); + }); +}); + +describe('createNotionLibrarian apiFallback', () => { + it('invokes apiFallback when VFS returns no entries', async () => { + const apiFallback = vi.fn(async () => [ + { + path: '/notion/pages/fallback-page.json', + type: 'file', + provider: 'notion', + updatedAt: '2026-04-17T08:00:00.000Z', + title: 'FallbackPage', + properties: { + id: 'fallback-page', + database: 'Roadmap', + tag: 'Urgent', + author: 'Casey', + }, + } satisfies VfsEntry, + ]); + + const result = await createLibrarian([], apiFallback).handler.execute('type:page database:Roadmap'); + + expect(apiFallback).toHaveBeenCalledOnce(); + expect(apiFallback).toHaveBeenCalledWith( + expect.objectContaining({ + filters: { + type: ['page'], + database: ['Roadmap'], + }, + types: ['page'], + }), + ); + expect(result.metadata.source).toBe('apiFallback'); + expect(result.evidence.map((item) => item.id)).toEqual(['fallback-page']); + }); + + it('retries through apiFallback when VFS entries are all filtered out, then re-filters fallback results', async () => { + const apiFallback = vi.fn(async () => [ + { + path: '/notion/pages/fallback-match.json', + type: 'file', + provider: 'notion', + updatedAt: '2026-04-17T07:00:00.000Z', + title: 'FallbackMatch', + properties: { + id: 'fallback-match', + database: 'Roadmap', + tag: 'Urgent', + author: 'Robin', + }, + } satisfies VfsEntry, + { + path: '/notion/pages/fallback-miss.json', + type: 'file', + provider: 'notion', + updatedAt: '2026-04-17T06:00:00.000Z', + title: 'FallbackMiss', + properties: { + id: 'fallback-miss', + database: 'Roadmap', + tag: 'Low', + author: 'Robin', + }, + } satisfies VfsEntry, + ]); + + const result = await createLibrarian( + [ + { + path: '/notion/pages/vfs-miss.json', + type: 'file', + provider: 'notion', + updatedAt: '2026-04-17T05:00:00.000Z', + title: 'VfsMiss', + properties: { + id: 'vfs-miss', + database: 'Incidents', + tag: 'Low', + author: 'Morgan', + }, + }, + ], + apiFallback, + ).handler.execute('type:page database:Roadmap tag:Urgent'); + + expect(apiFallback).toHaveBeenCalledOnce(); + expect(result.metadata.source).toBe('apiFallback'); + expect(result.evidence.map((item) => item.id)).toEqual(['fallback-match']); + }); +}); + +describe('createNotionLibrarian type inference', () => { + it('prefers properties.type over the collection path when inferring entity types', async () => { + const result = await createLibrarian([ + { + path: '/notion/databases/property-type-override.json', + type: 'file', + provider: 'notion', + updatedAt: '2026-04-17T11:00:00.000Z', + title: 'OverrideViaProperty', + properties: { + id: 'property-type-page-hit', + type: 'page', + database: 'Workspace', + tag: 'Product', + author: 'Grace', + }, + }, + ]).handler.execute('override'); + + expect(result.evidence.map((item) => item.id)).toEqual(['property-type-page-hit']); + expect(result.evidence[0]?.content.type).toBe('page'); + }); + + it('uses the inferred collection path type when matching type filters without properties.type', async () => { + const pageResult = await createLibrarian().handler.execute('type:page title:LaunchPlan'); + const databaseResult = await createLibrarian().handler.execute('type:database'); + + expect(pageResult.evidence.map((item) => item.id)).toEqual(['page-path-hit']); + expect(pageResult.evidence[0]?.content.type).toBe('page'); + expect(databaseResult.evidence.map((item) => item.id)).toEqual(['database-path-hit']); + expect(databaseResult.evidence[0]?.content.type).toBe('database'); + }); + + it('treats entries outside the known Notion collections as unknown', async () => { + const result = await createLibrarian([ + { + path: '/notion/misc/orphaned-entry.json', + type: 'file', + provider: 'notion', + updatedAt: '2026-04-17T04:00:00.000Z', + title: 'OrphanedEntry', + properties: { + id: 'unknown-entry', + database: 'Archive', + tag: 'Reference', + author: 'Taylor', + }, + }, + ]).handler.execute('orphaned'); + + expect(result.status).toBe('complete'); + expect(result.evidence.map((item) => item.id)).toEqual(['unknown-entry']); + expect(result.evidence[0]?.content.type).toBe('notion'); + }); +}); + +describe('enumerateNotion instruction safety', () => { + // Regression for codex P1 review on PR #62: a multi-word filter value like + // `database: ["Product Roadmap"]` was previously rendered as the unquoted + // token `database:Product Roadmap`. The shared parseQuery splits on /\s+/, + // so it would parse `database:Product` as a filter and silently drop + // `Roadmap` into free text — losing the actual filter intent. + it('does not split multi-word filter values into corrupted tokens', async () => { + // Capture every VFS access the engine makes so we can assert filters are + // NOT silently corrupted by the buildEnumerationInstruction → parseQuery + // round-trip. Pre-fix, `database:Product Roadmap` would parse to filter + // `database:Product` (truncated), which then mangled root computation. + const listCalls: string[] = []; + const searchCalls: string[] = []; + const vfs = { + list: async (path: string): Promise => { + listCalls.push(path); + return []; + }, + search: async (query: string): Promise => { + searchCalls.push(query); + return []; + }, + }; + + const result = await enumerateNotion( + { + capability: 'notion.enumerate', + query: 'investor info', + filters: { database: ['Product Roadmap'], title: ['Launch Plan'] }, + }, + { vfs }, + ); + + // The librarian must complete and return a well-formed result. Pre-fix, + // a corrupted instruction could trip an internal type narrowing or + // produce a half-parsed filter that never reaches the VFS at all. + expect(result).toBeDefined(); + expect(result.capability).toBe('notion.enumerate'); + // At least one of list/search must have been dispatched — proving the + // mangled-token codepath is gone and the engine made it past parseQuery + // into an actual VFS query. + expect(listCalls.length + searchCalls.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/specialists/src/notion/librarian.ts b/packages/specialists/src/notion/librarian.ts new file mode 100644 index 0000000..0760889 --- /dev/null +++ b/packages/specialists/src/notion/librarian.ts @@ -0,0 +1,404 @@ +import type { VfsEntry } from '@agent-assistant/vfs'; + +import { + createLibrarian, + type GenericLibrarianFindings, + type GenericLibrarianSpecialist, + type LibrarianAdapter, + type LibrarianApiFallback, + type LibrarianFallbackRequest, + type LibrarianStatus, + type LibrarianVfs, +} from '../shared/librarian-engine.js'; +import type { + NotionEntityType, + NotionEnumerationCapability, + NotionEnumerationParams, +} from './types.js'; + +export type NotionEnumerationType = NotionEntityType; + +const NOTION_ENUMERATION_CAPABILITY: NotionEnumerationCapability = 'notion.enumerate'; +type EnumerationStatus = LibrarianStatus; +const NOTION_FILTER_KEYS = ['type', 'database', 'title', 'tag', 'author'] as const; +const NOTION_ENTITY_TYPES = new Set(['page', 'database', 'block', 'comment']); + +type NotionLibrarianVfs = LibrarianVfs; +type NotionLibrarianFallbackRequest = LibrarianFallbackRequest; +type NotionLibrarianApiFallback = LibrarianApiFallback; + +export interface NotionLibrarianOptions { + vfs: NotionLibrarianVfs; + apiFallback?: NotionLibrarianApiFallback; +} + +export interface NotionEnumerationEvidenceContent + extends Partial< + Record< + | 'provider' + | 'revision' + | 'updatedAt' + | 'createdAt' + | 'url' + | 'identifier' + | 'databaseId' + | 'snippet', + string + > + > { + type: NotionEnumerationType | 'notion'; + path: string; + title: string; + database: string; + tag: string[]; + author: string; + properties: Record; +} + +export interface NotionEnumerationEvidence { + id: string; + kind: 'enumeration_hit'; + content: NotionEnumerationEvidenceContent; +} + +export interface NotionLibrarianFindings + extends Omit { + capability: NotionEnumerationCapability; + status: EnumerationStatus; + evidence: NotionEnumerationEvidence[]; +} + +export interface NotionLibrarianSpecialist + extends Omit< + GenericLibrarianSpecialist, + 'name' | 'capabilities' | 'handler' + > { + name: 'notion-librarian'; + capabilities: NotionEnumerationCapability[]; + handler: { + execute(instruction: string, context?: unknown): Promise; + }; +} + +const COLLECTION_ROOT_BY_TYPE: Record = { + page: '/notion/pages/', + database: '/notion/databases/', + block: '/notion/blocks/', + comment: '/notion/comments/', +}; + +const notionLibrarianAdapter: LibrarianAdapter = { + capability: NOTION_ENUMERATION_CAPABILITY, + entityTypes: ['page', 'database', 'block', 'comment'], + filterKeys: ['type', 'database', 'title', 'tag', 'author'], + searchProvider: 'notion', + listRoots(types) { + return types.map((type) => COLLECTION_ROOT_BY_TYPE[type]); + }, + inferFilters: inferEnumerationFilters, + valuesForFilter, + inferEntityType, + toEvidence, +}; + +export function createNotionLibrarian({ + vfs, + apiFallback, +}: NotionLibrarianOptions): NotionLibrarianSpecialist { + const options = { + vfs, + name: 'notion-librarian', + description: 'Enumerates Notion pages, databases, blocks, and comments from VFS-backed metadata.', + }; + const engine = createLibrarian(notionLibrarianAdapter, apiFallback ? { ...options, apiFallback } : options); + return engine as unknown as NotionLibrarianSpecialist; +} + +export async function enumerateNotion( + params: NotionEnumerationParams, + options: NotionLibrarianOptions, +): Promise { + return createNotionLibrarian(options).handler.execute(buildEnumerationInstruction(params)); +} + +function buildEnumerationInstruction(params: NotionEnumerationParams): string { + const parts = params.query?.trim() ? [params.query.trim()] : []; + const filters = params.filters ?? {}; + + for (const key of NOTION_FILTER_KEYS) { + for (const value of filters[key] ?? []) { + // The shared parseQuery splits the instruction on /\s+/, so any + // whitespace inside a `key:value` token would split the value across + // tokens and corrupt the filter (e.g. `database:Product Roadmap` + // would parse as filter `database:Product` plus stray text `Roadmap`). + // For Notion, multi-word filter values (database names, page titles) + // are normal — append them as bare text so `inferEnumerationFilters` + // can pattern-match them downstream instead of getting them silently + // mangled by the parser. + if (/\s/.test(value)) { + parts.push(value); + } else { + parts.push(`${key}:${value}`); + } + } + } + + return parts.join(' ').trim(); +} + +function inferEnumerationFilters(text: string, parsedFilters: Record): Record { + const filters = cloneFilters(parsedFilters); + inferExplicitFilters(text, filters); + const normalizedText = ` ${text.toLowerCase().replace(/[-_]+/g, ' ')} `; + + if (!filters.type?.length) { + if (/\b(page|pages)\b/.test(normalizedText)) filters.type = ['page']; + else if (/\b(database|databases)\b/.test(normalizedText)) filters.type = ['database']; + else if (/\b(block|blocks)\b/.test(normalizedText)) filters.type = ['block']; + else if (/\b(comment|comments)\b/.test(normalizedText)) filters.type = ['comment']; + } + + if (!filters.database?.length) { + const database = cueValue(text, /\bdatabase\s+(?:"([^"]+)"|'([^']+)'|([^\s]+))/i); + if (database) filters.database = [database]; + } + + if (!filters.author?.length) { + const author = cueValue(text, /\b(?:author|by)\s+(?:"([^"]+)"|'([^']+)'|([^\s]+))/i); + if (author) filters.author = [author]; + } + + if (!filters.tag?.length) { + const tag = cueValue(text, /\btag(?:ged)?\s+(?:"([^"]+)"|'([^']+)'|([^\s]+))/i); + if (tag) filters.tag = [tag]; + } + + if (!filters.title?.length) { + const title = cueValue(text, /\btitle\s+(?:"([^"]+)"|'([^']+)'|([^\s]+))/i); + if (title) filters.title = [title]; + } + + return filters; +} + +function inferExplicitFilters(text: string, filters: Record): void { + for (const token of text.trim().split(/\s+/)) { + if (!token) continue; + + const separatorIndex = token.indexOf(':'); + if (separatorIndex <= 0 || separatorIndex === token.length - 1) continue; + + const key = token.slice(0, separatorIndex).toLowerCase(); + if (!NOTION_FILTER_KEYS.includes(key as (typeof NOTION_FILTER_KEYS)[number])) continue; + + const value = normalizeExplicitFilterValue(key, token.slice(separatorIndex + 1)); + if (!value) continue; + + const existing = filters[key] ?? []; + if (!existing.includes(value)) filters[key] = [...existing, value]; + } +} + +function normalizeExplicitFilterValue(key: string, value: string): string | undefined { + const trimmed = unquote(value.replace(/[,.]$/g, '').trim()); + if (!trimmed) return undefined; + + if (key === 'type') { + const normalizedType = normalizeEntityType(trimmed); + return NOTION_ENTITY_TYPES.has(normalizedType) ? normalizedType : undefined; + } + + return trimmed; +} + +function normalizeEntityType(value: string): string { + const normalized = value.toLowerCase().replace(/[-_\s]+/g, ''); + if (normalized === 'page' || normalized === 'pages') return 'page'; + if (normalized === 'database' || normalized === 'databases') return 'database'; + if (normalized === 'block' || normalized === 'blocks') return 'block'; + if (normalized === 'comment' || normalized === 'comments') return 'comment'; + return normalized; +} + +function unquote(value: string): string { + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + return value.slice(1, -1); + } + + return value; +} + +function valuesForFilter(entry: VfsEntry, key: string): string[] { + const properties = entry.properties ?? {}; + + if (key === 'type') { + const type = inferEntityType(entry); + return [properties.type, type === 'unknown' ? undefined : type].filter(isString); + } + if (key === 'database') { + return expandPropertyValues( + properties.database, + properties.databaseId, + properties.databaseTitle, + properties.parentDatabase, + properties.parentId, + ); + } + if (key === 'title') { + return expandPropertyValues(properties.title, properties.name, entry.title); + } + if (key === 'tag') { + return expandPropertyValues(properties.tag, properties.tags); + } + if (key === 'author') { + return expandPropertyValues( + properties.author, + properties.createdBy, + properties.lastEditedBy, + properties.lastEditedByName, + ); + } + + return []; +} + +function inferEntityType(entry: VfsEntry): NotionEnumerationType | 'unknown' { + const propertyType = firstString( + entry.properties?.type, + entry.properties?.objectType, + entry.properties?.entityType, + ) + .toLowerCase() + .replace(/[-_\s]+/g, ''); + + if (propertyType === 'page' || propertyType === 'notionpage') return 'page'; + if (propertyType === 'database' || propertyType === 'notiondatabase') return 'database'; + if (propertyType === 'block' || propertyType === 'notionblock') return 'block'; + if (propertyType === 'comment' || propertyType === 'notioncomment') return 'comment'; + return collectionItemTypeFromPath(entry.path) ?? 'unknown'; +} + +function toEvidence( + entry: VfsEntry, + type: NotionEnumerationType | 'unknown', +): NotionEnumerationEvidence { + const properties = entry.properties ?? {}; + const content: NotionEnumerationEvidenceContent = { + type: type === 'unknown' ? 'notion' : type, + path: entry.path, + title: firstString(entry.title, properties.title, properties.name, idFromPath(entry.path), entry.path), + database: firstString( + properties.database, + properties.databaseTitle, + properties.parentDatabase, + properties.parentId, + ), + tag: unique(expandPropertyValues(properties.tag, properties.tags)), + author: firstString( + properties.author, + properties.createdBy, + properties.lastEditedBy, + properties.lastEditedByName, + ), + properties, + }; + const id = firstString( + properties.id, + properties.pageId, + properties.databaseId, + properties.blockId, + properties.commentId, + idFromPath(entry.path), + entry.path, + ); + const identifier = firstString(properties.identifier, properties.slug, idFromPath(entry.path)); + const databaseId = firstString(properties.databaseId, properties.parentId); + const snippet = snippetFromEntry(entry); + + for (const key of ['provider', 'revision', 'updatedAt'] as const) { + const value = entry[key]; + if (value) content[key] = value; + } + for (const key of ['createdAt', 'url'] as const) { + const value = properties[key]; + if (value) content[key] = value; + } + if (identifier) content.identifier = identifier; + if (databaseId) content.databaseId = databaseId; + if (snippet) content.snippet = snippet; + + return { id, kind: 'enumeration_hit', content }; +} + +function collectionItemTypeFromPath(path: string): NotionEnumerationType | undefined { + const match = /\/notion\/(pages|databases|blocks|comments)\/[^/]+(?:\.json)?$/i.exec(path); + if (match?.[1] === 'pages') return 'page'; + if (match?.[1] === 'databases') return 'database'; + if (match?.[1] === 'blocks') return 'block'; + if (match?.[1] === 'comments') return 'comment'; + return undefined; +} + +function idFromPath(path: string): string | undefined { + const leaf = path.split('/').filter(Boolean).at(-1); + if (!leaf) return undefined; + return decodeSegment(leaf.replace(/\.json$/i, '')); +} + +function expandPropertyValues(...values: Array): string[] { + return values.flatMap((value) => expandPropertyValue(value)); +} + +function expandPropertyValue(value: string | undefined): string[] { + if (!value) return []; + const trimmed = value.trim(); + if (!trimmed) return []; + + if (trimmed.startsWith('[')) { + try { + const parsed: unknown = JSON.parse(trimmed); + if (Array.isArray(parsed)) return parsed.filter(isString).map((item) => item.trim()).filter(Boolean); + } catch { + // Fall through to comma-separated handling. + } + } + + return trimmed.split(',').map((item) => item.trim()).filter(Boolean); +} + +function cueValue(text: string, pattern: RegExp): string | undefined { + const match = pattern.exec(text); + const value = firstString(match?.[1], match?.[2], match?.[3]); + return value?.replace(/[,.]$/g, ''); +} + +function cloneFilters(filters: Record): Record { + return Object.fromEntries(Object.entries(filters).map(([key, values]) => [key, [...values]])); +} + +function firstString(...values: Array): string { + return values.find((value): value is string => typeof value === 'string' && value.length > 0) ?? ''; +} + +function snippetFromEntry(entry: VfsEntry): string | undefined { + return 'snippet' in entry && typeof entry.snippet === 'string' ? entry.snippet : undefined; +} + +function decodeSegment(segment: string): string { + try { + return decodeURIComponent(segment); + } catch { + return segment; + } +} + +function unique(values: string[]): string[] { + return [...new Set(values)]; +} + +function isString(value: unknown): value is string { + return typeof value === 'string'; +} diff --git a/packages/specialists/src/notion/types.ts b/packages/specialists/src/notion/types.ts new file mode 100644 index 0000000..74dff27 --- /dev/null +++ b/packages/specialists/src/notion/types.ts @@ -0,0 +1,28 @@ +export type NotionEnumerationCapability = 'notion.enumerate'; +export type NotionSearchProvider = 'notion'; +export type NotionEntityType = 'page' | 'database' | 'block' | 'comment'; +export type NotionFilterKey = + | 'type' + | 'database' + | 'title' + | 'tag' + | 'author' + | 'updated_window'; + +export interface NotionQueryFilterSet { + type?: NotionEntityType[]; + database?: string[]; + title?: string[]; + tag?: string[]; + author?: string[]; + updated_window?: string[]; + [filter: string]: string[] | undefined; +} + +export interface NotionEnumerationParams { + capability: NotionEnumerationCapability; + query?: string; + filters?: NotionQueryFilterSet; + cursor?: string; + limit?: number; +}