Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions tests/skill/understand/test_cleanup_workdirs.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import { describe, it, expect, afterEach } from 'vitest';
import {
existsSync,
mkdtempSync,
mkdirSync,
readFileSync,
readdirSync,
rmSync,
utimesSync,
writeFileSync,
} from 'node:fs';
import { tmpdir } from 'node:os';
import { join, resolve, dirname } from 'node:path';
import { spawnSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const SCRIPT = resolve(
__dirname,
'../../../understand-anything-plugin/skills/understand/cleanup-workdirs.mjs',
);
const SKILL = resolve(
__dirname,
'../../../understand-anything-plugin/skills/understand/SKILL.md',
);
const NOW_MS = Date.UTC(2026, 4, 29, 12, 0, 0);

function runCleanup(projectRoot, args = []) {
return spawnSync('node', [SCRIPT, projectRoot, `--now-ms=${NOW_MS}`, ...args], {
encoding: 'utf-8',
});
}

describe('cleanup-workdirs.mjs', () => {
let root;

afterEach(() => {
if (root) rmSync(root, { recursive: true, force: true });
root = undefined;
});

it('moves fresh intermediate and tmp directories into timestamped trash', () => {
root = mkdtempSync(join(tmpdir(), 'ua-cleanup-'));
const stateDir = join(root, '.understand-anything');
mkdirSync(join(stateDir, 'intermediate', 'nested'), { recursive: true });
mkdirSync(join(stateDir, 'tmp'), { recursive: true });
writeFileSync(join(stateDir, 'intermediate', 'nested', 'graph.json'), '{"ok":true}');
writeFileSync(join(stateDir, 'tmp', 'validate.cjs'), 'console.log("ok");');

const result = runCleanup(root);

expect(result.status).toBe(0);
expect(result.stdout).toMatch(/moved intermediate, tmp/);
expect(existsSync(join(stateDir, 'intermediate'))).toBe(false);
expect(existsSync(join(stateDir, 'tmp'))).toBe(false);

const trashDirs = readdirSync(stateDir).filter((name) => name.startsWith('.trash-'));
expect(trashDirs).toHaveLength(1);
const trashDir = join(stateDir, trashDirs[0]);

expect(readFileSync(join(trashDir, 'intermediate', 'nested', 'graph.json'), 'utf-8'))
.toBe('{"ok":true}');
expect(readFileSync(join(trashDir, 'tmp', 'validate.cjs'), 'utf-8'))
.toBe('console.log("ok");');
});

it('preserves scan-result.json for future incremental runs', () => {
root = mkdtempSync(join(tmpdir(), 'ua-cleanup-scan-result-'));
const stateDir = join(root, '.understand-anything');
mkdirSync(join(stateDir, 'intermediate'), { recursive: true });
writeFileSync(join(stateDir, 'intermediate', 'scan-result.json'), '{"files":[]}');
writeFileSync(join(stateDir, 'intermediate', 'assembled-graph.json'), '{"nodes":[]}');

const result = runCleanup(root);

expect(result.status).toBe(0);
expect(readFileSync(join(stateDir, 'intermediate', 'scan-result.json'), 'utf-8'))
.toBe('{"files":[]}');
expect(existsSync(join(stateDir, 'intermediate', 'assembled-graph.json'))).toBe(false);

const trashDirs = readdirSync(stateDir).filter((name) => name.startsWith('.trash-'));
expect(trashDirs).toHaveLength(1);
expect(readFileSync(
join(stateDir, trashDirs[0], 'intermediate', 'assembled-graph.json'),
'utf-8',
)).toBe('{"nodes":[]}');
});

it('purges expired trash while keeping recent trash', () => {
root = mkdtempSync(join(tmpdir(), 'ua-cleanup-purge-'));
const stateDir = join(root, '.understand-anything');
const oldTrash = join(stateDir, '.trash-old');
const recentTrash = join(stateDir, '.trash-recent');
mkdirSync(oldTrash, { recursive: true });
mkdirSync(recentTrash, { recursive: true });
writeFileSync(join(oldTrash, 'old.txt'), 'old');
writeFileSync(join(recentTrash, 'recent.txt'), 'recent');

const oldDate = new Date(NOW_MS - 8 * 24 * 60 * 60 * 1000);
const recentDate = new Date(NOW_MS - 6 * 24 * 60 * 60 * 1000);
utimesSync(oldTrash, oldDate, oldDate);
utimesSync(recentTrash, recentDate, recentDate);

const result = runCleanup(root, ['--retention-days=7']);

expect(result.status).toBe(0);
expect(existsSync(oldTrash)).toBe(false);
expect(existsSync(recentTrash)).toBe(true);
expect(result.stdout).toMatch(/purged 1 expired trash directory/);
});

it('documents Phase 7 cleanup without immediate recursive deletion of fresh work dirs', () => {
const skill = readFileSync(SKILL, 'utf-8');

expect(skill).toContain('cleanup-workdirs.mjs');
expect(skill).not.toMatch(
/rm\s+-rf\s+\$PROJECT_ROOT\/\.understand-anything\/(?:intermediate|tmp)/,
);
});
});
16 changes: 3 additions & 13 deletions understand-anything-plugin/skills/understand/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -776,21 +776,11 @@ Report to the user: `[Phase 7/7] Saving knowledge graph...`
}
```

4. Clean up intermediate files, **preserving `scan-result.json`** so future incremental runs can skip Phase 1 SCAN (see issue #293). We `mv` scratch dirs into a timestamped `.trash-*` instead of `rm -rf`ing them directly — this avoids tripping destructive-action gates on hardened hosts (e.g. freshness-window checks) that flag deleting directories created moments earlier (see issue #301). The delayed-purge step in Phase 0 reclaims the space once the trash is older than 7 days.
4. Move intermediate files into reversible trash, **preserving `scan-result.json`** so future incremental runs can skip Phase 1 SCAN (see issue #293), and purge old trash:
```bash
# Preserve scan-result.json — Phase 1's deterministic file inventory.
# Future incremental runs (Phase 2 compute-batches.mjs --changed-files=…)
# need this inventory; without it, Phase 1 must re-dispatch and pay ~157k
# tokens / ~158s per incremental run.
TRASH="$PROJECT_ROOT/.understand-anything/.trash-$(date +%s)"
mkdir -p "$TRASH"
INTER="$PROJECT_ROOT/.understand-anything/intermediate"
if [ -d "$INTER" ]; then
# Move every entry except scan-result.json into the trash dir.
find "$INTER" -mindepth 1 -maxdepth 1 -not -name 'scan-result.json' -exec mv {} "$TRASH/" \; 2>/dev/null || true
fi
mv "$PROJECT_ROOT/.understand-anything/tmp" "$TRASH/" 2>/dev/null || true
node <SKILL_DIR>/cleanup-workdirs.mjs $PROJECT_ROOT
```
The cleanup script keeps `.understand-anything/intermediate/scan-result.json` in place, renames the rest of the fresh `intermediate/` contents plus `tmp/` into a timestamped `.understand-anything/.trash-*` directory to avoid tripping destructive-action gates on just-created paths (see issue #301), then purges only trash directories older than 7 days.

5. Report a summary to the user containing:
- Project name and description
Expand Down
162 changes: 162 additions & 0 deletions understand-anything-plugin/skills/understand/cleanup-workdirs.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env node
/**
* cleanup-workdirs.mjs - Phase 7 cleanup for /understand
*
* Moves fresh per-run work directories into a timestamped trash directory
* instead of deleting them in place. Old trash directories are purged after a
* retention window, so hardened hosts do not need to approve immediate
* recursive deletion of artifacts that were created moments earlier.
*
* Usage:
* node cleanup-workdirs.mjs <project-root> [--retention-days=7]
*
* Writes:
* <project-root>/.understand-anything/.trash-<timestamp>-<pid>/
*/

import {
existsSync,
mkdirSync,
readdirSync,
renameSync,
rmSync,
rmdirSync,
statSync,
} from 'node:fs';
import { basename, join, resolve } from 'node:path';

const MS_PER_DAY = 24 * 60 * 60 * 1000;
const DEFAULT_RETENTION_DAYS = 7;
const WORK_DIRS = ['intermediate', 'tmp'];

function parseArgs(argv) {
const [, , projectRootArg, ...rest] = argv;
if (!projectRootArg) {
throw new Error('Usage: node cleanup-workdirs.mjs <project-root> [--retention-days=7]');
}

const options = {
projectRoot: resolve(projectRootArg),
retentionDays: DEFAULT_RETENTION_DAYS,
nowMs: Date.now(),
};

for (const arg of rest) {
if (arg.startsWith('--retention-days=')) {
const value = Number(arg.slice('--retention-days='.length));
if (!Number.isFinite(value) || value < 0) {
throw new Error(`Invalid --retention-days value: ${arg}`);
}
options.retentionDays = value;
} else if (arg.startsWith('--now-ms=')) {
// Test-only determinism hook. Normal skill usage should not pass this.
const value = Number(arg.slice('--now-ms='.length));
if (!Number.isFinite(value)) {
throw new Error(`Invalid --now-ms value: ${arg}`);
}
options.nowMs = value;
} else {
throw new Error(`Unknown argument: ${arg}`);
}
}

return options;
}

function timestampForPath(nowMs) {
return new Date(nowMs)
.toISOString()
.replace(/[-:]/g, '')
.replace(/[.]/g, '')
.replace('T', '-');
}

function purgeExpiredTrash(stateDir, nowMs, retentionDays) {
if (!existsSync(stateDir)) return [];

const cutoffMs = nowMs - retentionDays * MS_PER_DAY;
const purged = [];

for (const entry of readdirSync(stateDir, { withFileTypes: true })) {
if (!entry.isDirectory() || !entry.name.startsWith('.trash-')) continue;

const fullPath = join(stateDir, entry.name);
const stat = statSync(fullPath);
if (stat.mtimeMs > cutoffMs) continue;

rmSync(fullPath, { recursive: true, force: true });
purged.push(entry.name);
}

return purged;
}

function moveWorkDirs(stateDir, nowMs) {
const moved = [];
let trashDir = null;

for (const name of WORK_DIRS) {
const source = join(stateDir, name);
if (!existsSync(source)) continue;

if (name === 'intermediate') {
const entries = readdirSync(source, { withFileTypes: true })
.filter((entry) => entry.name !== 'scan-result.json');
if (entries.length === 0) continue;

if (!trashDir) {
trashDir = join(stateDir, `.trash-${timestampForPath(nowMs)}-${process.pid}`);
mkdirSync(trashDir, { recursive: true });
}

const destination = join(trashDir, basename(source));
mkdirSync(destination, { recursive: true });
for (const entry of entries) {
renameSync(join(source, entry.name), join(destination, entry.name));
}
try {
rmdirSync(source);
} catch {
// Keep intermediate/ when scan-result.json remains for incremental runs.
}
moved.push({ name, destination });
continue;
}

if (!trashDir) {
trashDir = join(stateDir, `.trash-${timestampForPath(nowMs)}-${process.pid}`);
mkdirSync(trashDir, { recursive: true });
}

const destination = join(trashDir, basename(source));
renameSync(source, destination);
moved.push({ name, destination });
}

return { trashDir, moved };
}

function main() {
const { projectRoot, retentionDays, nowMs } = parseArgs(process.argv);
const stateDir = join(projectRoot, '.understand-anything');

mkdirSync(stateDir, { recursive: true });

const purged = purgeExpiredTrash(stateDir, nowMs, retentionDays);
const { trashDir, moved } = moveWorkDirs(stateDir, nowMs);

const movedNames = moved.map((item) => item.name).join(', ') || 'none';
const trashMessage = trashDir ? ` into ${trashDir}` : '';
process.stdout.write(
`Cleanup complete: moved ${movedNames}${trashMessage}; ` +
`purged ${purged.length} expired trash director${purged.length === 1 ? 'y' : 'ies'}.\n`,
);
}

try {
main();
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
process.stderr.write(`Cleanup failed: ${message}\n`);
process.exit(1);
}