diff --git a/tests/skill/understand/test_cleanup_workdirs.test.mjs b/tests/skill/understand/test_cleanup_workdirs.test.mjs new file mode 100644 index 00000000..c84a98c8 --- /dev/null +++ b/tests/skill/understand/test_cleanup_workdirs.test.mjs @@ -0,0 +1,120 @@ +import { describe, it, expect, afterEach } from 'vitest'; +import { + existsSync, + mkdtempSync, + mkdirSync, + readFileSync, + readdirSync, + rmSync, + utimesSync, + writeFileSync, +} from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join, resolve, dirname } from 'node:path'; +import { spawnSync } from 'node:child_process'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const SCRIPT = resolve( + __dirname, + '../../../understand-anything-plugin/skills/understand/cleanup-workdirs.mjs', +); +const SKILL = resolve( + __dirname, + '../../../understand-anything-plugin/skills/understand/SKILL.md', +); +const NOW_MS = Date.UTC(2026, 4, 29, 12, 0, 0); + +function runCleanup(projectRoot, args = []) { + return spawnSync('node', [SCRIPT, projectRoot, `--now-ms=${NOW_MS}`, ...args], { + encoding: 'utf-8', + }); +} + +describe('cleanup-workdirs.mjs', () => { + let root; + + afterEach(() => { + if (root) rmSync(root, { recursive: true, force: true }); + root = undefined; + }); + + it('moves fresh intermediate and tmp directories into timestamped trash', () => { + root = mkdtempSync(join(tmpdir(), 'ua-cleanup-')); + const stateDir = join(root, '.understand-anything'); + mkdirSync(join(stateDir, 'intermediate', 'nested'), { recursive: true }); + mkdirSync(join(stateDir, 'tmp'), { recursive: true }); + writeFileSync(join(stateDir, 'intermediate', 'nested', 'graph.json'), '{"ok":true}'); + writeFileSync(join(stateDir, 'tmp', 'validate.cjs'), 'console.log("ok");'); + + const result = runCleanup(root); + + expect(result.status).toBe(0); + expect(result.stdout).toMatch(/moved intermediate, tmp/); + expect(existsSync(join(stateDir, 'intermediate'))).toBe(false); + expect(existsSync(join(stateDir, 'tmp'))).toBe(false); + + const trashDirs = readdirSync(stateDir).filter((name) => name.startsWith('.trash-')); + expect(trashDirs).toHaveLength(1); + const trashDir = join(stateDir, trashDirs[0]); + + expect(readFileSync(join(trashDir, 'intermediate', 'nested', 'graph.json'), 'utf-8')) + .toBe('{"ok":true}'); + expect(readFileSync(join(trashDir, 'tmp', 'validate.cjs'), 'utf-8')) + .toBe('console.log("ok");'); + }); + + it('preserves scan-result.json for future incremental runs', () => { + root = mkdtempSync(join(tmpdir(), 'ua-cleanup-scan-result-')); + const stateDir = join(root, '.understand-anything'); + mkdirSync(join(stateDir, 'intermediate'), { recursive: true }); + writeFileSync(join(stateDir, 'intermediate', 'scan-result.json'), '{"files":[]}'); + writeFileSync(join(stateDir, 'intermediate', 'assembled-graph.json'), '{"nodes":[]}'); + + const result = runCleanup(root); + + expect(result.status).toBe(0); + expect(readFileSync(join(stateDir, 'intermediate', 'scan-result.json'), 'utf-8')) + .toBe('{"files":[]}'); + expect(existsSync(join(stateDir, 'intermediate', 'assembled-graph.json'))).toBe(false); + + const trashDirs = readdirSync(stateDir).filter((name) => name.startsWith('.trash-')); + expect(trashDirs).toHaveLength(1); + expect(readFileSync( + join(stateDir, trashDirs[0], 'intermediate', 'assembled-graph.json'), + 'utf-8', + )).toBe('{"nodes":[]}'); + }); + + it('purges expired trash while keeping recent trash', () => { + root = mkdtempSync(join(tmpdir(), 'ua-cleanup-purge-')); + const stateDir = join(root, '.understand-anything'); + const oldTrash = join(stateDir, '.trash-old'); + const recentTrash = join(stateDir, '.trash-recent'); + mkdirSync(oldTrash, { recursive: true }); + mkdirSync(recentTrash, { recursive: true }); + writeFileSync(join(oldTrash, 'old.txt'), 'old'); + writeFileSync(join(recentTrash, 'recent.txt'), 'recent'); + + const oldDate = new Date(NOW_MS - 8 * 24 * 60 * 60 * 1000); + const recentDate = new Date(NOW_MS - 6 * 24 * 60 * 60 * 1000); + utimesSync(oldTrash, oldDate, oldDate); + utimesSync(recentTrash, recentDate, recentDate); + + const result = runCleanup(root, ['--retention-days=7']); + + expect(result.status).toBe(0); + expect(existsSync(oldTrash)).toBe(false); + expect(existsSync(recentTrash)).toBe(true); + expect(result.stdout).toMatch(/purged 1 expired trash directory/); + }); + + it('documents Phase 7 cleanup without immediate recursive deletion of fresh work dirs', () => { + const skill = readFileSync(SKILL, 'utf-8'); + + expect(skill).toContain('cleanup-workdirs.mjs'); + expect(skill).not.toMatch( + /rm\s+-rf\s+\$PROJECT_ROOT\/\.understand-anything\/(?:intermediate|tmp)/, + ); + }); +}); diff --git a/understand-anything-plugin/skills/understand/SKILL.md b/understand-anything-plugin/skills/understand/SKILL.md index 10bf9ccb..dc1ffc02 100644 --- a/understand-anything-plugin/skills/understand/SKILL.md +++ b/understand-anything-plugin/skills/understand/SKILL.md @@ -776,21 +776,11 @@ Report to the user: `[Phase 7/7] Saving knowledge graph...` } ``` -4. Clean up intermediate files, **preserving `scan-result.json`** so future incremental runs can skip Phase 1 SCAN (see issue #293). We `mv` scratch dirs into a timestamped `.trash-*` instead of `rm -rf`ing them directly — this avoids tripping destructive-action gates on hardened hosts (e.g. freshness-window checks) that flag deleting directories created moments earlier (see issue #301). The delayed-purge step in Phase 0 reclaims the space once the trash is older than 7 days. +4. Move intermediate files into reversible trash, **preserving `scan-result.json`** so future incremental runs can skip Phase 1 SCAN (see issue #293), and purge old trash: ```bash - # Preserve scan-result.json — Phase 1's deterministic file inventory. - # Future incremental runs (Phase 2 compute-batches.mjs --changed-files=…) - # need this inventory; without it, Phase 1 must re-dispatch and pay ~157k - # tokens / ~158s per incremental run. - TRASH="$PROJECT_ROOT/.understand-anything/.trash-$(date +%s)" - mkdir -p "$TRASH" - INTER="$PROJECT_ROOT/.understand-anything/intermediate" - if [ -d "$INTER" ]; then - # Move every entry except scan-result.json into the trash dir. - find "$INTER" -mindepth 1 -maxdepth 1 -not -name 'scan-result.json' -exec mv {} "$TRASH/" \; 2>/dev/null || true - fi - mv "$PROJECT_ROOT/.understand-anything/tmp" "$TRASH/" 2>/dev/null || true + node /cleanup-workdirs.mjs $PROJECT_ROOT ``` + The cleanup script keeps `.understand-anything/intermediate/scan-result.json` in place, renames the rest of the fresh `intermediate/` contents plus `tmp/` into a timestamped `.understand-anything/.trash-*` directory to avoid tripping destructive-action gates on just-created paths (see issue #301), then purges only trash directories older than 7 days. 5. Report a summary to the user containing: - Project name and description diff --git a/understand-anything-plugin/skills/understand/cleanup-workdirs.mjs b/understand-anything-plugin/skills/understand/cleanup-workdirs.mjs new file mode 100644 index 00000000..3bdce6d4 --- /dev/null +++ b/understand-anything-plugin/skills/understand/cleanup-workdirs.mjs @@ -0,0 +1,162 @@ +#!/usr/bin/env node +/** + * cleanup-workdirs.mjs - Phase 7 cleanup for /understand + * + * Moves fresh per-run work directories into a timestamped trash directory + * instead of deleting them in place. Old trash directories are purged after a + * retention window, so hardened hosts do not need to approve immediate + * recursive deletion of artifacts that were created moments earlier. + * + * Usage: + * node cleanup-workdirs.mjs [--retention-days=7] + * + * Writes: + * /.understand-anything/.trash--/ + */ + +import { + existsSync, + mkdirSync, + readdirSync, + renameSync, + rmSync, + rmdirSync, + statSync, +} from 'node:fs'; +import { basename, join, resolve } from 'node:path'; + +const MS_PER_DAY = 24 * 60 * 60 * 1000; +const DEFAULT_RETENTION_DAYS = 7; +const WORK_DIRS = ['intermediate', 'tmp']; + +function parseArgs(argv) { + const [, , projectRootArg, ...rest] = argv; + if (!projectRootArg) { + throw new Error('Usage: node cleanup-workdirs.mjs [--retention-days=7]'); + } + + const options = { + projectRoot: resolve(projectRootArg), + retentionDays: DEFAULT_RETENTION_DAYS, + nowMs: Date.now(), + }; + + for (const arg of rest) { + if (arg.startsWith('--retention-days=')) { + const value = Number(arg.slice('--retention-days='.length)); + if (!Number.isFinite(value) || value < 0) { + throw new Error(`Invalid --retention-days value: ${arg}`); + } + options.retentionDays = value; + } else if (arg.startsWith('--now-ms=')) { + // Test-only determinism hook. Normal skill usage should not pass this. + const value = Number(arg.slice('--now-ms='.length)); + if (!Number.isFinite(value)) { + throw new Error(`Invalid --now-ms value: ${arg}`); + } + options.nowMs = value; + } else { + throw new Error(`Unknown argument: ${arg}`); + } + } + + return options; +} + +function timestampForPath(nowMs) { + return new Date(nowMs) + .toISOString() + .replace(/[-:]/g, '') + .replace(/[.]/g, '') + .replace('T', '-'); +} + +function purgeExpiredTrash(stateDir, nowMs, retentionDays) { + if (!existsSync(stateDir)) return []; + + const cutoffMs = nowMs - retentionDays * MS_PER_DAY; + const purged = []; + + for (const entry of readdirSync(stateDir, { withFileTypes: true })) { + if (!entry.isDirectory() || !entry.name.startsWith('.trash-')) continue; + + const fullPath = join(stateDir, entry.name); + const stat = statSync(fullPath); + if (stat.mtimeMs > cutoffMs) continue; + + rmSync(fullPath, { recursive: true, force: true }); + purged.push(entry.name); + } + + return purged; +} + +function moveWorkDirs(stateDir, nowMs) { + const moved = []; + let trashDir = null; + + for (const name of WORK_DIRS) { + const source = join(stateDir, name); + if (!existsSync(source)) continue; + + if (name === 'intermediate') { + const entries = readdirSync(source, { withFileTypes: true }) + .filter((entry) => entry.name !== 'scan-result.json'); + if (entries.length === 0) continue; + + if (!trashDir) { + trashDir = join(stateDir, `.trash-${timestampForPath(nowMs)}-${process.pid}`); + mkdirSync(trashDir, { recursive: true }); + } + + const destination = join(trashDir, basename(source)); + mkdirSync(destination, { recursive: true }); + for (const entry of entries) { + renameSync(join(source, entry.name), join(destination, entry.name)); + } + try { + rmdirSync(source); + } catch { + // Keep intermediate/ when scan-result.json remains for incremental runs. + } + moved.push({ name, destination }); + continue; + } + + if (!trashDir) { + trashDir = join(stateDir, `.trash-${timestampForPath(nowMs)}-${process.pid}`); + mkdirSync(trashDir, { recursive: true }); + } + + const destination = join(trashDir, basename(source)); + renameSync(source, destination); + moved.push({ name, destination }); + } + + return { trashDir, moved }; +} + +function main() { + const { projectRoot, retentionDays, nowMs } = parseArgs(process.argv); + const stateDir = join(projectRoot, '.understand-anything'); + + mkdirSync(stateDir, { recursive: true }); + + const purged = purgeExpiredTrash(stateDir, nowMs, retentionDays); + const { trashDir, moved } = moveWorkDirs(stateDir, nowMs); + + const movedNames = moved.map((item) => item.name).join(', ') || 'none'; + const trashMessage = trashDir ? ` into ${trashDir}` : ''; + process.stdout.write( + `Cleanup complete: moved ${movedNames}${trashMessage}; ` + + `purged ${purged.length} expired trash director${purged.length === 1 ? 'y' : 'ies'}.\n`, + ); +} + +try { + main(); +} catch (err) { + const message = err instanceof Error ? err.message : String(err); + process.stderr.write(`Cleanup failed: ${message}\n`); + process.exit(1); +}