Skip to content

Commit a602287

Browse files
committed
Refactor CLI caching with in-memory and file storage
1 parent dc8e1e9 commit a602287

8 files changed

Lines changed: 432 additions & 262 deletions

File tree

src/cache.test.ts

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
import * as fs from "fs";
2+
import * as os from "os";
3+
import path from "path";
4+
5+
import test from "ava";
6+
7+
import {
8+
cacheCommandOutput,
9+
getCachedCommandOutput,
10+
resetCachedCommandOutputs,
11+
CommandCacheKey,
12+
} from "./cache";
13+
import { isVersionInfo } from "./codeql";
14+
import { setupTests } from "./testing-utils";
15+
16+
setupTests(test);
17+
18+
const COMMAND_CACHE_FILENAME = "codeql-action-command-cache.json";
19+
20+
/**
21+
* Runs `body` with a temporary directory configured as the cache's backing
22+
* store (`RUNNER_TEMP`). `CODEQL_ACTION_TEMP` is cleared so that
23+
* `getTemporaryDirectory()` falls back to `RUNNER_TEMP`.
24+
*
25+
* `setupTests` snapshots and restores `process.env` around every test, so we
26+
* don't restore the environment variables we set here ourselves.
27+
*/
28+
async function withCacheDir(
29+
body: (cacheFilePath: string) => Promise<void> | void,
30+
): Promise<void> {
31+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "cache-test-"));
32+
process.env["RUNNER_TEMP"] = tmpDir;
33+
delete process.env["CODEQL_ACTION_TEMP"];
34+
resetCachedCommandOutputs();
35+
try {
36+
await body(path.join(tmpDir, COMMAND_CACHE_FILENAME));
37+
} finally {
38+
await fs.promises.rm(tmpDir, { force: true, recursive: true });
39+
}
40+
}
41+
42+
function writeCacheFile(
43+
cacheFilePath: string,
44+
contents: Record<string, unknown>,
45+
): void {
46+
fs.writeFileSync(cacheFilePath, JSON.stringify(contents));
47+
}
48+
49+
test.serial(
50+
"getCachedCommandOutput reuses an output persisted by an earlier step",
51+
async (t) => {
52+
await withCacheDir((cacheFilePath) => {
53+
writeCacheFile(cacheFilePath, {
54+
[CommandCacheKey.Version]: {
55+
cmd: "/path/to/codeql",
56+
output: { version: "2.20.0" },
57+
},
58+
});
59+
t.deepEqual(
60+
getCachedCommandOutput(
61+
CommandCacheKey.Version,
62+
"/path/to/codeql",
63+
isVersionInfo,
64+
),
65+
{ version: "2.20.0" },
66+
);
67+
});
68+
},
69+
);
70+
71+
test.serial(
72+
"getCachedCommandOutput ignores an output persisted from a different CLI",
73+
async (t) => {
74+
await withCacheDir((cacheFilePath) => {
75+
writeCacheFile(cacheFilePath, {
76+
[CommandCacheKey.Version]: {
77+
cmd: "/path/to/other-codeql",
78+
output: { version: "2.20.0" },
79+
},
80+
});
81+
t.is(
82+
getCachedCommandOutput(
83+
CommandCacheKey.Version,
84+
"/path/to/codeql",
85+
isVersionInfo,
86+
),
87+
undefined,
88+
);
89+
});
90+
},
91+
);
92+
93+
test.serial(
94+
"getCachedCommandOutput ignores a malformed cache file",
95+
async (t) => {
96+
await withCacheDir((cacheFilePath) => {
97+
fs.writeFileSync(cacheFilePath, "not valid json");
98+
t.is(
99+
getCachedCommandOutput(
100+
CommandCacheKey.Version,
101+
"/path/to/codeql",
102+
isVersionInfo,
103+
),
104+
undefined,
105+
);
106+
});
107+
},
108+
);
109+
110+
test.serial(
111+
"getCachedCommandOutput returns undefined when there is no cache file",
112+
async (t) => {
113+
await withCacheDir(() => {
114+
t.is(
115+
getCachedCommandOutput(
116+
CommandCacheKey.Version,
117+
"/path/to/codeql",
118+
isVersionInfo,
119+
),
120+
undefined,
121+
);
122+
});
123+
},
124+
);
125+
126+
test.serial(
127+
"getCachedCommandOutput ignores an output that fails validation",
128+
async (t) => {
129+
await withCacheDir((cacheFilePath) => {
130+
for (const output of [
131+
{},
132+
{ version: 2 },
133+
{ version: "2.20.0", overlayVersion: "1" },
134+
{ version: "2.20.0", features: "nope" },
135+
]) {
136+
resetCachedCommandOutputs();
137+
writeCacheFile(cacheFilePath, {
138+
[CommandCacheKey.Version]: { cmd: "/path/to/codeql", output },
139+
});
140+
t.is(
141+
getCachedCommandOutput(
142+
CommandCacheKey.Version,
143+
"/path/to/codeql",
144+
isVersionInfo,
145+
),
146+
undefined,
147+
JSON.stringify(output),
148+
);
149+
}
150+
});
151+
},
152+
);
153+
154+
test.serial(
155+
"getCachedCommandOutput ignores an entry missing the cmd field",
156+
async (t) => {
157+
await withCacheDir((cacheFilePath) => {
158+
writeCacheFile(cacheFilePath, {
159+
[CommandCacheKey.Version]: { output: { version: "2.20.0" } },
160+
});
161+
t.is(
162+
getCachedCommandOutput(
163+
CommandCacheKey.Version,
164+
"/path/to/codeql",
165+
isVersionInfo,
166+
),
167+
undefined,
168+
);
169+
});
170+
},
171+
);
172+
173+
test.serial(
174+
"cacheCommandOutput persists the output to both the memo and the file",
175+
async (t) => {
176+
await withCacheDir((cacheFilePath) => {
177+
cacheCommandOutput("some-command", "/path/to/codeql", {
178+
hello: "world",
179+
});
180+
181+
// Tier 2: the temporary file contains the entry.
182+
const onDisk = JSON.parse(
183+
fs.readFileSync(cacheFilePath, "utf8"),
184+
) as Record<string, unknown>;
185+
t.deepEqual(onDisk["some-command"], {
186+
cmd: "/path/to/codeql",
187+
output: { hello: "world" },
188+
});
189+
190+
// Tier 1: the value is served from the memo even after the file is gone.
191+
fs.rmSync(cacheFilePath);
192+
t.deepEqual(getCachedCommandOutput("some-command", "/path/to/codeql"), {
193+
hello: "world",
194+
});
195+
});
196+
},
197+
);
198+
199+
test.serial(
200+
"getCachedCommandOutput prefers the in-memory memo over the file",
201+
async (t) => {
202+
await withCacheDir((cacheFilePath) => {
203+
cacheCommandOutput("some-command", "/path/to/codeql", { value: 1 });
204+
205+
// Overwrite the file with a different value; the memo (tier 1) should win.
206+
writeCacheFile(cacheFilePath, {
207+
"some-command": { cmd: "/path/to/codeql", output: { value: 2 } },
208+
});
209+
t.deepEqual(getCachedCommandOutput("some-command", "/path/to/codeql"), {
210+
value: 1,
211+
});
212+
});
213+
},
214+
);
215+
216+
test.serial(
217+
"cacheCommandOutput throws if called twice for the same key",
218+
async (t) => {
219+
await withCacheDir(() => {
220+
cacheCommandOutput("some-command", "/path/to/codeql", { value: 1 });
221+
t.throws(() =>
222+
cacheCommandOutput("some-command", "/path/to/codeql", { value: 2 }),
223+
);
224+
});
225+
},
226+
);

src/cache.ts

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import * as fs from "fs";
2+
import * as path from "path";
3+
4+
import { getTemporaryDirectory } from "./actions-util";
5+
import * as json from "./json";
6+
7+
/** The name of the temporary file backing the cache (tier 2). */
8+
const COMMAND_CACHE_FILENAME = "codeql-action-command-cache.json";
9+
10+
/**
11+
* The keys under which the output of cached `codeql` commands is stored. Each
12+
* key is shared by the producer (the corresponding method in `codeql.ts`) and
13+
* any consumers (e.g. `status-report.ts`, which peeks the cached version
14+
* without invoking the CLI).
15+
*/
16+
export enum CommandCacheKey {
17+
Version = "version",
18+
ResolveLanguages = "resolveLanguages",
19+
}
20+
21+
/** A single cached command output together with the CLI path it came from. */
22+
interface CacheEntry {
23+
/**
24+
* The path to the CodeQL CLI that produced `output`. Persisted so that a
25+
* different step using a different CodeQL bundle doesn't pick up a stale
26+
* value.
27+
*/
28+
cmd: string;
29+
output: unknown;
30+
}
31+
32+
/**
33+
* Tier 1: the in-process memo. Consulted first on every lookup and populated
34+
* whenever a value is read from the file (tier 2) or computed via the CLI
35+
* (tier 3).
36+
*/
37+
const inMemoryCache = new Map<string, CacheEntry>();
38+
39+
function getCommandCacheFilePath(): string {
40+
return path.join(getTemporaryDirectory(), COMMAND_CACHE_FILENAME);
41+
}
42+
43+
/**
44+
* Reads and parses the temporary cache file. Best-effort: a missing, malformed,
45+
* or otherwise unreadable file is treated as an empty cache.
46+
*/
47+
function readCommandCacheFile(): Record<string, CacheEntry> {
48+
let contents: string;
49+
try {
50+
contents = fs.readFileSync(getCommandCacheFilePath(), "utf8");
51+
} catch {
52+
return {};
53+
}
54+
try {
55+
const parsed = json.parseString(contents);
56+
if (json.isObject(parsed)) {
57+
return parsed;
58+
}
59+
} catch {
60+
// Fall through and treat a malformed file as empty.
61+
}
62+
return {};
63+
}
64+
65+
/**
66+
* Persists the cache to the temporary file. Best-effort: a failure to write
67+
* just means a later step re-runs the CLI.
68+
*/
69+
function writeCommandCacheFile(data: Record<string, CacheEntry>): void {
70+
try {
71+
fs.writeFileSync(getCommandCacheFilePath(), JSON.stringify(data));
72+
} catch {
73+
// Best-effort; ignore write failures.
74+
}
75+
}
76+
77+
/**
78+
* Stores the output of a command under `key`, writing it to both the in-memory
79+
* memo (tier 1) and the temporary file (tier 2).
80+
*
81+
* Should only be called once per key within a single process; doing otherwise
82+
* indicates a logic error, since a value that has already been cached should be
83+
* served from the memo rather than recomputed.
84+
*/
85+
export function cacheCommandOutput(
86+
key: string,
87+
cmd: string,
88+
output: unknown,
89+
): void {
90+
if (inMemoryCache.has(key)) {
91+
throw new Error(
92+
`cacheCommandOutput() should be called only once per key, but was called more than once for '${key}'.`,
93+
);
94+
}
95+
const entry: CacheEntry = { cmd, output };
96+
inMemoryCache.set(key, entry);
97+
98+
const data = readCommandCacheFile();
99+
data[key] = entry;
100+
writeCommandCacheFile(data);
101+
}
102+
103+
/**
104+
* Returns the cached output for `key`, or `undefined` if it isn't cached.
105+
*
106+
* Resolves tier 1 (in-memory memo) first, then tier 2 (temporary file). A value
107+
* loaded from the file is ignored unless its `cmd` matches the optional `cmd`
108+
* argument, and it satisfies the optional `validate` type guard; valid values
109+
* are memoized into tier 1 before being returned.
110+
*
111+
* A return value of `undefined` signals the caller to fall back to tier 3 (the
112+
* CLI).
113+
*/
114+
export function getCachedCommandOutput<T>(
115+
key: string,
116+
cmd?: string,
117+
validate?: (output: unknown) => output is T,
118+
): T | undefined {
119+
// Tier 1: the in-memory variable.
120+
const memoized = inMemoryCache.get(key);
121+
if (memoized !== undefined) {
122+
return memoized.output as T;
123+
}
124+
125+
// Tier 2: the temporary file persisted by an earlier step, if any.
126+
const entry = readCommandCacheFile()[key] as unknown;
127+
if (
128+
!json.isObject<CacheEntry>(entry) ||
129+
!json.isString(entry.cmd) ||
130+
(cmd !== undefined && entry.cmd !== cmd) ||
131+
(validate !== undefined && !validate(entry.output))
132+
) {
133+
return undefined;
134+
}
135+
136+
// Memoize so subsequent lookups in this process hit tier 1.
137+
inMemoryCache.set(key, { cmd: entry.cmd, output: entry.output });
138+
return entry.output as T;
139+
}
140+
141+
/**
142+
* Clears the in-process memo (tier 1). Only for use in tests, which exercise
143+
* multiple "steps" within a single process.
144+
*/
145+
export function resetCachedCommandOutputs(): void {
146+
inMemoryCache.clear();
147+
}

0 commit comments

Comments
 (0)