diff --git a/src/core/cli/core_commands.js b/src/core/cli/core_commands.js index ce5d0fc..4625202 100644 --- a/src/core/cli/core_commands.js +++ b/src/core/cli/core_commands.js @@ -35,6 +35,8 @@ import { } from './remote_commands.js' import { CORE_VERBS } from './core_verbs.js' import { verbToCommand } from './verb_command.js' +import { createUsagePolicyResolver, findRepoRoot } from '../usage-policy/index.js' +import { executeQuerySql } from '../query/sql.js' // `query sql` migrated to a verb (LLP 0034 §verbs): it is registered by // `registerCoreVerbs` and projected into both a CLI command and an MCP @@ -260,10 +262,16 @@ function buildCoreCommands() { }, { name: 'ignore', - summary: 'Mark the current session as ignored by recording sources', - usage: 'hyp ignore', + summary: 'Write a .hypignore so HypAware never records this folder subtree (--check reports status)', + usage: 'hyp ignore [path] [--check] [--json]', run: runIgnore, }, + { + name: 'unignore', + summary: 'Remove the governing .hypignore so HypAware records this folder subtree again', + usage: 'hyp unignore [path]', + run: runUnignore, + }, { name: 'skills install', summary: 'Install registered skills into AI client directories', @@ -3665,15 +3673,239 @@ function expandClientName(requested, gateway) { return [requested] } +// The body written by `hyp ignore`: a self-documenting `.hypignore` whose +// first meaningful token is the `ignore` usage class. The comment header +// explains the file to whoever finds it in a checkout; the matcher only ever +// reads the token (LLP 0049 #file-format). +const HYPIGNORE_TEMPLATE = `# HypAware usage policy — .hypignore +# +# This folder and everything beneath it is IGNORED: AI gateway exchanges +# (Claude / Codex) whose working directory is at or under this directory are +# never written to the local HypAware cache, for live capture and backfill +# alike. Recording is suppressed at the capture seam; the live LLM call is +# untouched (LLP 0049 / LLP 0050). +# +# Managed by \`hyp ignore\` / \`hyp unignore\`; \`hyp ignore --check\` reports +# status. Removing this file re-enables recording for the subtree. +# +# The token below names the usage class. V1 implements only \`ignore\`. +ignore +` + +/** + * Parse `hyp ignore` / `hyp unignore` argv: an optional positional path and + * the `--check` / `--json` flags (`--check` is meaningful for `ignore` only). + * + * @param {string[]} argv + * @returns {{ check: boolean, json: boolean, path?: string, error?: string }} + */ +function parseIgnoreArgs(argv) { + /** @type {{ check: boolean, json: boolean, path?: string, error?: string }} */ + const r = { check: false, json: false } + for (const arg of argv) { + if (arg === '--check') { r.check = true; continue } + if (arg === '--json') { r.json = true; continue } + if (arg.startsWith('-')) { r.error = `unknown argument: ${arg}`; return r } + if (r.path !== undefined) { r.error = `unexpected extra argument: ${arg}`; return r } + r.path = arg + } + return r +} + /** - * @param {string[]} _argv + * `hyp ignore [path] [--check]` + * + * Without `--check`, writes a self-documenting `.hypignore` (comment header + + * `ignore` token) so HypAware stops recording the folder subtree. The file + * lands at the git **repo root** when the target is inside a repo, else at the + * target directory; an explicit `path` overrides the default (cwd) target. The + * write is idempotent (LLP 0049 R5): a path already governed by an ancestor + * `.hypignore` is left as-is. With `--check`, reports status without writing. + * + * @ref LLP 0049#cli [implements]: the `hyp ignore` verb — write the dotfile at the repo root, idempotent, with a prospective-only `--check` + * @param {string[]} argv * @param {CommandRunContext} ctx */ -async function runIgnore(_argv, ctx) { - ctx.stdout.write('(session ignore is contributed by recording-source plugins)\n') +async function runIgnore(argv, ctx) { + const parsed = parseIgnoreArgs(argv) + if (parsed.error) { + ctx.stderr.write(`error: ${parsed.error}\n`) + return 2 + } + if (parsed.check) return runIgnoreCheck(parsed, ctx) + + const base = path.resolve(parsed.path ?? ctx.cwd) + // Idempotent (R5): a fresh resolver reflects disk. Any governing ancestor + // `.hypignore` already ignores `base` (V1 has no un-ignore directive — any + // `.hypignore` resolves to `ignore`), so re-ignoring is a no-op success + // rather than a redundant nested file. + const existing = createUsagePolicyResolver().resolve(base) + if (existing.governedBy) { + ctx.stdout.write(`already ignored (governed by ${existing.governedBy})\n`) + return 0 + } + + // Default target: the repo root when `base` is in a git repo, else `base`. + // An explicit `path` overrides — write exactly where the caller pointed. + const targetDir = parsed.path ? base : (findRepoRoot(base) ?? base) + const file = path.join(targetDir, '.hypignore') + try { + await fs.writeFile(file, HYPIGNORE_TEMPLATE) + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + ctx.stderr.write(`error: could not write ${file}: ${message}\n`) + return 1 + } + getLogger('usage-policy').info('usage_policy.ignore_write', { + [Attr.COMPONENT]: 'cmd-ignore', + [Attr.OPERATION]: 'usage_policy.ignore_write', + status: 'ok', + }) + ctx.stdout.write(`wrote ${file}\n`) return 0 } +/** + * `hyp unignore [path]` + * + * Removes the nearest governing `.hypignore`, re-enabling recording for the + * subtree. Idempotent (LLP 0049 R5): unignoring a path that no `.hypignore` + * governs succeeds as a no-op. + * + * @ref LLP 0049#cli [implements]: the `hyp unignore` verb — remove the governing dotfile, idempotent + * @param {string[]} argv + * @param {CommandRunContext} ctx + */ +async function runUnignore(argv, ctx) { + const parsed = parseIgnoreArgs(argv) + if (parsed.error) { + ctx.stderr.write(`error: ${parsed.error}\n`) + return 2 + } + if (parsed.check) { + ctx.stderr.write('error: --check is only valid for `hyp ignore`\n') + return 2 + } + + const base = path.resolve(parsed.path ?? ctx.cwd) + const { governedBy } = createUsagePolicyResolver().resolve(base) + if (!governedBy) { + ctx.stdout.write(`not ignored (no .hypignore governs ${base})\n`) + return 0 + } + try { + await fs.rm(governedBy, { force: true }) + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + ctx.stderr.write(`error: could not remove ${governedBy}: ${message}\n`) + return 1 + } + getLogger('usage-policy').info('usage_policy.unignore_remove', { + [Attr.COMPONENT]: 'cmd-unignore', + [Attr.OPERATION]: 'usage_policy.unignore_remove', + status: 'ok', + }) + ctx.stdout.write(`removed ${governedBy}\n`) + return 0 +} + +/** + * `hyp ignore --check [path]` + * + * Reports whether `path` (default cwd) is currently ignored, which + * `.hypignore` governs, and the residual count of already-cached rows from the + * scope. This is prospective-only: `--check` never purges — it just surfaces + * the residue so the rule stays debuggable (LLP 0049 #prospective-only). + * + * @ref LLP 0049#prospective-only [implements]: `--check` reports the residual already-cached row count; it never deletes + * @param {{ json: boolean, path?: string }} parsed + * @param {CommandRunContext} ctx + * @returns {Promise} + */ +async function runIgnoreCheck(parsed, ctx) { + const base = path.resolve(parsed.path ?? ctx.cwd) + const result = createUsagePolicyResolver().resolve(base) + const ignored = result.class === 'ignore' + const scopeDir = result.governedBy ? path.dirname(result.governedBy) : base + const residual = ignored ? await countResidualCachedRows(scopeDir, ctx) : 0 + + if (parsed.json) { + ctx.stdout.write( + JSON.stringify({ + path: base, + ignored, + governedBy: result.governedBy, + class: result.class, + declared: result.declared, + residualCachedRows: residual, + }) + '\n' + ) + return 0 + } + + ctx.stdout.write(`path: ${base}\n`) + ctx.stdout.write(`ignored: ${ignored ? 'yes' : 'no'}\n`) + ctx.stdout.write(`governed-by: ${result.governedBy ?? '(none)'}\n`) + ctx.stdout.write(`residual-cached-rows: ${residual === null ? 'unknown' : residual}\n`) + return 0 +} + +/** + * Count already-cached `ai_gateway_messages` rows whose `cwd`/`repo_root` lies + * under `scopeDir` — the residue an `ignore` does NOT purge (prospective-only). + * + * A LIKE pushes a *superset* filter into the scan (squirreling's LIKE treats + * `_`/`%` as wildcards, so a path containing them can only over-match, never + * under-match), then an exact `startsWith` refine in JS removes the false + * positives so the reported count is precise. Best-effort: when the dataset is + * not registered (the gateway plugin is inactive) or the cache cannot be read, + * returns `null` so the caller renders `unknown` rather than failing. + * + * @param {string} scopeDir + * @param {CommandRunContext} ctx + * @returns {Promise} + */ +async function countResidualCachedRows(scopeDir, ctx) { + const lit = scopeDir.replace(/'/g, "''") + const likePrefix = `${scopeDir}/`.replace(/'/g, "''") + const sql = + `SELECT cwd, repo_root FROM ai_gateway_messages ` + + `WHERE cwd = '${lit}' OR cwd LIKE '${likePrefix}%' ` + + `OR repo_root = '${lit}' OR repo_root LIKE '${likePrefix}%'` + try { + const out = await executeQuerySql({ + query: sql, + registry: ctx.query, + storage: /** @type {ExtendedQueryStorageService} */ (ctx.storage), + refresh: 'never', + config: ctx.config, + }) + let n = 0 + for (const row of out.rows ?? []) { + const cwd = row.cwd == null ? '' : String(row.cwd) + const repoRoot = row.repo_root == null ? '' : String(row.repo_root) + if (isUnderDir(cwd, scopeDir) || isUnderDir(repoRoot, scopeDir)) n += 1 + } + return n + } catch { + return null + } +} + +/** + * True when `p` is `dir` itself or a path strictly beneath it. + * + * @param {string} p + * @param {string} dir + * @returns {boolean} + */ +function isUnderDir(p, dir) { + if (p === '') return false + if (p === dir) return true + const prefix = dir.endsWith('/') ? dir : `${dir}/` + return p.startsWith(prefix) +} + /** * `hyp skills install [--client ]` * diff --git a/src/core/usage-policy/index.js b/src/core/usage-policy/index.js index 95adf22..6a31794 100644 --- a/src/core/usage-policy/index.js +++ b/src/core/usage-policy/index.js @@ -5,3 +5,6 @@ // import it exactly as they import `src/core/observability`. export { parseHypignore } from './format.js' export { createUsagePolicyResolver } from './matcher.js' +// Repo-root resolution for the `hyp ignore` CLI (LLP 0049 #cli): place a +// single repo-wide `.hypignore` at the git toplevel. +export { findRepoRoot } from './repo_root.js' diff --git a/src/core/usage-policy/repo_root.js b/src/core/usage-policy/repo_root.js new file mode 100644 index 0000000..57751fb --- /dev/null +++ b/src/core/usage-policy/repo_root.js @@ -0,0 +1,35 @@ +// @ts-check + +import nodeFs from 'node:fs' +import path from 'node:path' + +const GIT_ENTRY = '.git' + +/** + * Find the git repository root governing `startDir`: the nearest ancestor + * (inclusive) that contains a `.git` entry — a directory for an ordinary + * clone, a file for a linked worktree or submodule. Returns `null` when + * `startDir` is not inside a git repository. + * + * This is the repo-root resolution the `hyp ignore` CLI reuses to drop a + * single repo-wide `.hypignore` at the toplevel, mirroring what the + * Claude/Codex adapters derive with `git rev-parse --show-toplevel` when they + * stamp `repo_root` (LLP 0049 #cli). It is kept as dependency-free, + * fs-injectable path logic — an ancestor walk in the same shape as the + * `.hypignore` matcher — so the CLI need not spawn git and so it stays + * hermetically unit-testable. + * + * @param {string} startDir + * @param {object} [fs] + * @param {(p: string) => boolean} [fs.existsSync] + * @returns {string | null} + */ +export function findRepoRoot(startDir, { existsSync = nodeFs.existsSync } = {}) { + let dir = path.resolve(startDir) + while (true) { + if (existsSync(path.join(dir, GIT_ENTRY))) return dir + const parent = path.dirname(dir) + if (parent === dir) return null // reached the filesystem root + dir = parent + } +} diff --git a/test/core/ignore-command.test.js b/test/core/ignore-command.test.js new file mode 100644 index 0000000..af175b2 --- /dev/null +++ b/test/core/ignore-command.test.js @@ -0,0 +1,267 @@ +// @ts-check + +import assert from 'node:assert/strict' +import { existsSync, mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import path from 'node:path' +import test from 'node:test' + +import { asyncRow } from 'squirreling' + +import { registerCoreCommands } from '../../src/core/cli/core_commands.js' +import { createCommandRegistry } from '../../src/core/registry/commands.js' + +/** + * @import { CommandRegistration, CommandRunContext } from '../../collectivus-plugin-kernel-types.js' + */ + +// `hyp ignore` / `hyp unignore` write and remove a `.hypignore` to gate folder +// capture (LLP 0049 #cli). The tests run the real command bodies against a +// real temp tree, so idempotency and `--check` reporting are exercised +// end-to-end through the same registry the dispatcher uses. + +/** @returns {{ write(chunk: unknown): boolean, text(): string }} */ +function makeBuf() { + let value = '' + return { + write(chunk) { + value += String(chunk) + return true + }, + text() { + return value + }, + } +} + +/** @param {string} name */ +function getCommand(name) { + const registry = createCommandRegistry() + registerCoreCommands(registry) + const command = registry.get(name) + assert.ok(command, `${name} is registered`) + return /** @type {CommandRegistration} */ (command) +} + +/** + * Run a registered command body the same way dispatch would, against a fake + * CommandRunContext rooted at `cwd`. + * + * @param {string} name + * @param {string[]} argv + * @param {{ cwd: string, query?: unknown, storage?: unknown }} opts + */ +async function run(name, argv, opts) { + const stdout = makeBuf() + const stderr = makeBuf() + const ctx = /** @type {any} */ ({ + stdout, + stderr, + cwd: opts.cwd, + env: {}, + config: { version: 2 }, + query: opts.query ?? { getDataset: () => undefined, listDatasets: () => [] }, + storage: opts.storage ?? { cacheRoot: path.join(opts.cwd, '.cache'), pendingInfo: async () => ({ pending: false }) }, + }) + const code = await getCommand(name).run(argv, /** @type {CommandRunContext} */ (ctx)) + return { code, stdout: stdout.text(), stderr: stderr.text() } +} + +/** @param {(dir: string) => Promise | void} fn */ +async function withTempTree(fn) { + const dir = mkdtempSync(path.join(tmpdir(), 'hypign-')) + try { + await fn(dir) + } finally { + rmSync(dir, { recursive: true, force: true }) + } +} + +/* --------------------------------- ignore -------------------------------- */ + +test('hyp ignore writes a self-documenting .hypignore at the git repo root', async () => { + await withTempTree(async (root) => { + mkdirSync(path.join(root, '.git')) + const sub = path.join(root, 'src', 'deep') + mkdirSync(sub, { recursive: true }) + + const res = await run('ignore', [], { cwd: sub }) + assert.equal(res.code, 0) + + const file = path.join(root, '.hypignore') + assert.ok(existsSync(file), 'wrote .hypignore at the repo root, not the cwd') + assert.ok(!existsSync(path.join(sub, '.hypignore')), 'did not write a nested file') + const body = readFileSync(file, 'utf8') + assert.match(body, /^ignore$/m, 'first meaningful token is the ignore class') + assert.match(body, /HypAware usage policy/, 'has a self-documenting comment header') + assert.match(res.stdout, new RegExp(`wrote ${file.replace(/[.\\]/g, '\\$&')}`)) + }) +}) + +test('hyp ignore without a repo writes .hypignore at the cwd', async () => { + await withTempTree(async (root) => { + // No `.git` anywhere under the temp tree => fall back to the cwd. + const res = await run('ignore', [], { cwd: root }) + assert.equal(res.code, 0) + assert.ok(existsSync(path.join(root, '.hypignore'))) + }) +}) + +test('hyp ignore [path] writes exactly at the explicit path, overriding the repo root', async () => { + await withTempTree(async (root) => { + mkdirSync(path.join(root, '.git')) + const target = path.join(root, 'pkg') + mkdirSync(target) + + const res = await run('ignore', [target], { cwd: root }) + assert.equal(res.code, 0) + assert.ok(existsSync(path.join(target, '.hypignore')), 'explicit path overrides repo-root placement') + assert.ok(!existsSync(path.join(root, '.hypignore'))) + }) +}) + +test('hyp ignore is idempotent: re-ignoring an already-ignored path is a no-op success', async () => { + await withTempTree(async (root) => { + mkdirSync(path.join(root, '.git')) + const sub = path.join(root, 'a', 'b') + mkdirSync(sub, { recursive: true }) + + const first = await run('ignore', [], { cwd: sub }) + assert.equal(first.code, 0) + const file = path.join(root, '.hypignore') + const before = readFileSync(file, 'utf8') + + const second = await run('ignore', [], { cwd: sub }) + assert.equal(second.code, 0, 'second ignore still succeeds (R5)') + assert.match(second.stdout, /already ignored/) + assert.match(second.stdout, new RegExp(file.replace(/[.\\]/g, '\\$&'))) + assert.equal(readFileSync(file, 'utf8'), before, 'the existing file is not rewritten or clobbered') + }) +}) + +/* -------------------------------- unignore ------------------------------- */ + +test('hyp unignore removes the governing .hypignore and is idempotent', async () => { + await withTempTree(async (root) => { + mkdirSync(path.join(root, '.git')) + const file = path.join(root, '.hypignore') + writeFileSync(file, 'ignore\n') + const sub = path.join(root, 'x') + mkdirSync(sub) + + const first = await run('unignore', [], { cwd: sub }) + assert.equal(first.code, 0) + assert.match(first.stdout, /removed/) + assert.ok(!existsSync(file), 'the governing file is gone') + + const second = await run('unignore', [], { cwd: sub }) + assert.equal(second.code, 0, 'unignoring an unignored path still succeeds (R5)') + assert.match(second.stdout, /not ignored/) + }) +}) + +/* ------------------------------ ignore --check --------------------------- */ + +test('hyp ignore --check reports an ignored path, its governor, and residual count', async () => { + await withTempTree(async (root) => { + const file = path.join(root, '.hypignore') + writeFileSync(file, 'ignore\n') + + const res = await run('ignore', ['--check'], { cwd: root }) + assert.equal(res.code, 0) + assert.match(res.stdout, /ignored: yes/) + assert.match(res.stdout, new RegExp(`governed-by: ${file.replace(/[.\\]/g, '\\$&')}`)) + // No `ai_gateway_messages` dataset registered in this ctx => residual is + // reported as `unknown` rather than failing the command. + assert.match(res.stdout, /residual-cached-rows: unknown/) + }) +}) + +test('hyp ignore --check reports a clean path as not ignored with zero residue', async () => { + await withTempTree(async (root) => { + const res = await run('ignore', ['--check'], { cwd: root }) + assert.equal(res.code, 0) + assert.match(res.stdout, /ignored: no/) + assert.match(res.stdout, /governed-by: \(none\)/) + assert.match(res.stdout, /residual-cached-rows: 0/) + }) +}) + +test('hyp ignore --check --json emits a machine-readable status', async () => { + await withTempTree(async (root) => { + const file = path.join(root, '.hypignore') + writeFileSync(file, 'ignore\n') + + const res = await run('ignore', ['--check', '--json'], { cwd: root }) + assert.equal(res.code, 0) + const parsed = JSON.parse(res.stdout) + assert.equal(parsed.ignored, true) + assert.equal(parsed.governedBy, file) + assert.equal(parsed.class, 'ignore') + }) +}) + +test('hyp ignore --check counts already-cached rows under the scope (LIKE superset, refined exactly)', async () => { + await withTempTree(async (root) => { + const scope = path.join(root, 'my_app') // underscore => LIKE wildcard trap + mkdirSync(scope) + writeFileSync(path.join(scope, '.hypignore'), 'ignore\n') + + // `my_app` LIKE-matches the sibling `myXapp` (squirreling maps `_` -> any + // single char), so the exact JS refine must exclude it. + const sibling = path.join(root, 'myXapp') + const rows = [ + { cwd: scope, repo_root: scope }, // exact scope: counts + { cwd: path.join(scope, 'src', 'a'), repo_root: scope }, // under scope: counts + { cwd: path.join('/outside', 'zone'), repo_root: path.join(scope, 'deep') }, // repo_root under: counts + { cwd: path.join(sibling, 'y'), repo_root: sibling }, // LIKE false-positive: excluded + { cwd: '/elsewhere/unrelated', repo_root: '/elsewhere/unrelated' }, // unrelated: excluded + ] + + const { query, storage } = makeAiGatewayCache(rows) + const res = await run('ignore', ['--check', '--json'], { cwd: scope, query, storage }) + assert.equal(res.code, 0) + const parsed = JSON.parse(res.stdout) + assert.equal(parsed.ignored, true) + assert.equal(parsed.residualCachedRows, 3) + }) +}) + +/* -------------------------------- helpers -------------------------------- */ + +/** + * Build a minimal in-memory `ai_gateway_messages` dataset + registry/storage + * so `executeQuerySql` can run the residual-count query against fixed rows. + * + * @param {Record[]} data + */ +function makeAiGatewayCache(data) { + const columns = ['cwd', 'repo_root'] + const dataset = { + name: 'ai_gateway_messages', + plugin: 'test', + schema: { columns: columns.map((name) => ({ name, type: 'string' })) }, + discoverPartitions: async () => [], + createDataSource: () => ({ + numRows: data.length, + columns, + /** @param {{ columns?: string[] }} [opts] */ + scan(opts) { + const cols = opts?.columns ?? columns + return { + async *rows() { + for (const obj of data) yield asyncRow(/** @type {any} */ (obj), cols) + }, + appliedWhere: false, + appliedLimitOffset: false, + } + }, + }), + } + const query = { + getDataset: (/** @type {string} */ name) => (name === 'ai_gateway_messages' ? dataset : undefined), + listDatasets: () => [dataset], + } + const storage = { cacheRoot: '/tmp/hypaware-ignore-test', pendingInfo: async () => ({ pending: false }) } + return { query, storage } +} diff --git a/test/core/usage-policy.test.js b/test/core/usage-policy.test.js index ca8e256..41b13ea 100644 --- a/test/core/usage-policy.test.js +++ b/test/core/usage-policy.test.js @@ -3,7 +3,7 @@ import test from 'node:test' import assert from 'node:assert/strict' -import { parseHypignore, createUsagePolicyResolver } from '../../src/core/usage-policy/index.js' +import { parseHypignore, createUsagePolicyResolver, findRepoRoot } from '../../src/core/usage-policy/index.js' // --- format.js: parseHypignore ------------------------------------------- @@ -144,3 +144,31 @@ test('createUsagePolicyResolver defaults fs to node:fs when none injected', () = assert.ok(result.class === 'full' || result.class === 'ignore') assert.equal(typeof resolver.isIgnored(process.cwd()), 'boolean') }) + +// --- repo_root.js: findRepoRoot ------------------------------------------ + +/** @param {string[]} present Absolute paths existsSync should report true for. */ +function fakeExistsFs(present) { + const set = new Set(present) + return { existsSync: (/** @type {string} */ p) => set.has(p) } +} + +test('findRepoRoot: nearest ancestor with a .git entry is the repo root', () => { + const fs = fakeExistsFs(['/work/repo/.git']) + assert.equal(findRepoRoot('/work/repo/src/deep', fs), '/work/repo') +}) + +test('findRepoRoot: the start dir itself can be the repo root', () => { + const fs = fakeExistsFs(['/work/repo/.git']) + assert.equal(findRepoRoot('/work/repo', fs), '/work/repo') +}) + +test('findRepoRoot: returns null when no ancestor has a .git', () => { + const fs = fakeExistsFs([]) + assert.equal(findRepoRoot('/work/repo/src', fs), null) +}) + +test('findRepoRoot: defaults fs to node:fs without throwing', () => { + const result = findRepoRoot(process.cwd()) + assert.ok(result === null || typeof result === 'string') +})