Skip to content

Commit ca23fab

Browse files
wan9chiclaude
andcommitted
feat(cache): runner-aware auto output tracking + ignore consumption
Completes runner-aware caching on top of the IPC infra: - ignoreInput/ignoreOutput land across the whole IPC surface: protocol verbs, sync client (with cwd-resolution and Windows path canonicalization), napi addon methods, and server-side recording with absolute-path validation. - `output: None` resolves to auto inference again, so fspy-written files are archived and restored on a cache hit (auto output restoration). - The reported ignores are applied: vite excludes its out dir and write-then-read temp files from the input fingerprint and read-write overlap check, so `vite build` caches and restores `dist/` without manual `!`-glob exclusions. - Adds the real-vite e2e coverage, which needs this full client surface: ignoreInput keeps the cache valid, ignoreOutput allows a read-write overlap, vite build caches/restores its outputs and re-runs on tracked env changes, and vite dev disables caching end-to-end. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 8e2f29c commit ca23fab

109 files changed

Lines changed: 940 additions & 395 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Changelog
22

3+
- **Changed** Cached tasks without an `output` config now automatically archive the files the task writes and restore them on cache hits (`output: []` disables restoration). Runner-aware tools can exclude internal paths via `ignoreInput`/`ignoreOutput`, so `vite build` caches and restores `dist/` with zero manual cache config ([#431](https://github.com/voidzero-dev/vite-task/pull/431))
34
- **Added** Runner-aware `getEnvs` match sets can now participate in task cache fingerprints, so changing, adding, or removing a matching env var invalidates the cache.
45
- **Added** Runner-aware `getEnvs` calls now return env values served by the runner for matching env glob patterns.
56
- **Added** Runner-aware `getEnv` reads can now participate in task cache fingerprints, so changing a tool-served env value invalidates the cache and names the env var in the miss message.

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/vite_task/src/session/execute/cache_update.rs

Lines changed: 193 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
44
use std::{collections::BTreeMap, sync::Arc, time::Duration};
55

6+
use rustc_hash::FxHashSet;
67
use vite_path::{AbsolutePath, RelativePathBuf};
78
use vite_str::Str;
89
use vite_task_plan::cache_metadata::CacheMetadata;
@@ -27,6 +28,9 @@ use crate::{
2728
/// value is only ever `Some` when tracking happened (see [`observe_fspy`]).
2829
struct TrackingOutcome {
2930
path_reads: HashMap<RelativePathBuf, PathRead>,
31+
/// All paths the task wrote to. Consumed by `collect_and_archive_outputs`
32+
/// when `output_config.includes_auto` is set.
33+
path_writes: FxHashSet<RelativePathBuf>,
3034
/// First path that was both read and written during execution, if any.
3135
/// A non-empty value means caching this task is unsound.
3236
read_write_overlap: Option<RelativePathBuf>,
@@ -64,6 +68,15 @@ pub(super) async fn update_cache(
6468
return (CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::ToolRequested), None);
6569
}
6670

71+
// Tool-reported paths to exclude from auto-tracking. Absolute paths
72+
// are normalized to workspace-relative; anything outside is dropped.
73+
let ignored_input_rels: FxHashSet<RelativePathBuf> = reports
74+
.map(|r| normalize_ignored_paths(&r.ignored_inputs, workspace_root))
75+
.unwrap_or_default();
76+
let ignored_output_rels: FxHashSet<RelativePathBuf> = reports
77+
.map(|r| normalize_ignored_paths(&r.ignored_outputs, workspace_root))
78+
.unwrap_or_default();
79+
6780
if cancelled {
6881
// Cancelled (Ctrl-C or sibling failure) — result is untrustworthy.
6982
return (CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::Cancelled), None);
@@ -74,7 +87,14 @@ pub(super) async fn update_cache(
7487
return (CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::NonZeroExitStatus), None);
7588
}
7689

77-
let fspy_outcome = observe_fspy(outcome, input_negative_globs, workspace_root);
90+
let fspy_outcome = observe_fspy(
91+
outcome,
92+
metadata,
93+
input_negative_globs,
94+
&ignored_input_rels,
95+
&ignored_output_rels,
96+
workspace_root,
97+
);
7898

7999
if let Some(TrackingOutcome { read_write_overlap: Some(path), .. }) = &fspy_outcome {
80100
// fspy-inferred read-write overlap: the task wrote to a file it also
@@ -124,7 +144,15 @@ pub(super) async fn update_cache(
124144
}
125145
};
126146

127-
let output_archive = match collect_and_archive_outputs(metadata, workspace_root, cache_dir) {
147+
// Collect output files and create archive. Tool-reported `ignoreOutput`
148+
// paths are excluded from archiving too.
149+
let output_archive = match collect_and_archive_outputs(
150+
metadata,
151+
fspy_outcome.as_ref(),
152+
&ignored_output_rels,
153+
workspace_root,
154+
cache_dir,
155+
) {
128156
Ok(archive) => archive,
129157
Err(err) => {
130158
return (
@@ -151,32 +179,142 @@ pub(super) async fn update_cache(
151179
}
152180

153181
/// Summarize the run's fspy observations. `Some` iff tracking was both
154-
/// requested (`input_negative_globs.is_some()`) and compiled in (`cfg(fspy)`). On a
182+
/// requested (`tracking.is_some()`) and compiled in (`cfg(fspy)`). On a
155183
/// `cfg(not(fspy))` build this is always `None`, and [`update_cache`]
156184
/// short-circuits to `FspyUnsupported` when tracking was needed.
185+
///
186+
/// `path_reads` is gated on `input_config.includes_auto`, filtered by
187+
/// user-configured input negatives, and by tool-reported `ignoreInput`
188+
/// paths. `path_writes` is NOT filtered here — output negatives and
189+
/// `ignoreOutput` are applied later inside `collect_and_archive_outputs`.
190+
/// Keeping the two sides separate avoids `input: ["!dist/**"]` accidentally
191+
/// dropping writes to `dist/**`, which would break archive restoration.
157192
fn observe_fspy(
158193
outcome: &ChildOutcome,
194+
metadata: &CacheMetadata,
159195
input_negative_globs: Option<&[wax::Glob<'static>]>,
196+
ignored_input_rels: &FxHashSet<RelativePathBuf>,
197+
ignored_output_rels: &FxHashSet<RelativePathBuf>,
160198
workspace_root: &AbsolutePath,
161199
) -> Option<TrackingOutcome> {
162200
#[cfg(fspy)]
163201
{
202+
use wax::Program as _;
203+
164204
use super::tracked_accesses::TrackedPathAccesses;
165205

166-
outcome.path_accesses.as_ref().zip(input_negative_globs).map(|(raw, negatives)| {
167-
let tracked = TrackedPathAccesses::from_raw(raw, workspace_root, negatives);
168-
let read_write_overlap =
169-
tracked.path_reads.keys().find(|p| tracked.path_writes.contains(*p)).cloned();
170-
TrackingOutcome { path_reads: tracked.path_reads, read_write_overlap }
206+
outcome.path_accesses.as_ref().map(|raw| {
207+
let tracked = TrackedPathAccesses::from_raw(raw, workspace_root);
208+
let path_reads: HashMap<RelativePathBuf, PathRead> =
209+
if metadata.input_config.includes_auto
210+
&& let Some(negatives) = input_negative_globs
211+
{
212+
tracked
213+
.path_reads
214+
.iter()
215+
.filter(|(path, _)| {
216+
!negatives.iter().any(|neg| neg.is_match(path.as_str()))
217+
&& !is_ignored(path, ignored_input_rels)
218+
})
219+
.map(|(path, read)| (path.clone(), *read))
220+
.collect()
221+
} else {
222+
HashMap::default()
223+
};
224+
let read_write_overlap = path_reads
225+
.keys()
226+
.find(|p| tracked.path_writes.contains(*p) && !is_ignored(p, ignored_output_rels))
227+
.cloned();
228+
TrackingOutcome { path_reads, path_writes: tracked.path_writes, read_write_overlap }
171229
})
172230
}
173231
#[cfg(not(fspy))]
174232
{
175-
let _ = (outcome, input_negative_globs, workspace_root);
233+
let _ = (
234+
outcome,
235+
metadata,
236+
input_negative_globs,
237+
ignored_input_rels,
238+
ignored_output_rels,
239+
workspace_root,
240+
);
176241
None
177242
}
178243
}
179244

245+
/// Normalize tool-reported absolute paths to workspace-relative. Paths outside
246+
/// the workspace are dropped — they can't contribute to inputs or outputs.
247+
fn normalize_ignored_paths(
248+
paths: &FxHashSet<Arc<AbsolutePath>>,
249+
workspace_root: &AbsolutePath,
250+
) -> FxHashSet<RelativePathBuf> {
251+
// On Windows, `workspace_root` may carry a `\\?\` extended-path prefix
252+
// (it does when the runner derived it from `std::fs::canonicalize`)
253+
// while a tool's `current_dir()`-based ignoreInput/ignoreOutput path
254+
// doesn't. `Path::strip_prefix` is a byte-exact comparison so the
255+
// prefix mismatch silently drops every tool-reported path. Pre-build
256+
// an alternate workspace root with the `\\?\` / `\\.\` / `\??\`
257+
// prefix dropped and try it as a fallback. `fspy_shared::NativePath::
258+
// strip_path_prefix` does the inverse (strips `\\?\` from incoming
259+
// fspy paths) so each side stays agnostic to how the other side
260+
// canonicalised.
261+
#[cfg(windows)]
262+
let workspace_root_stripped: Option<vite_path::AbsolutePathBuf> =
263+
windows_strip_verbatim_prefix(workspace_root.as_path().as_os_str());
264+
265+
paths
266+
.iter()
267+
.filter_map(|p| {
268+
if let Some(rel) = p.strip_prefix(workspace_root).ok().flatten() {
269+
return Some(rel);
270+
}
271+
#[cfg(windows)]
272+
if let Some(alt_root) = workspace_root_stripped.as_ref() {
273+
if let Some(rel) = p.strip_prefix(alt_root).ok().flatten() {
274+
return Some(rel);
275+
}
276+
}
277+
None
278+
})
279+
.collect()
280+
}
281+
282+
/// Build an alternate workspace-root path by dropping a `\\?\`, `\\.\`,
283+
/// or `\??\` prefix if present. Returns `None` when the input is already
284+
/// in plain `C:\...` form (no fallback needed). Mirrors
285+
/// `fspy_shared::NativePath::strip_path_prefix`'s helper so the inputs of
286+
/// `strip_prefix` can match across `current_dir`-derived and
287+
/// `canonicalize`-derived paths.
288+
#[cfg(windows)]
289+
#[expect(
290+
clippy::disallowed_types,
291+
reason = "OsStr-level prefix matching for Windows extended-path normalization"
292+
)]
293+
fn windows_strip_verbatim_prefix(p: &std::ffi::OsStr) -> Option<vite_path::AbsolutePathBuf> {
294+
use std::os::windows::ffi::{OsStrExt, OsStringExt};
295+
let wide: Vec<u16> = p.encode_wide().collect();
296+
for prefix in [r"\\?\", r"\\.\", r"\??\"] {
297+
let prefix_wide: Vec<u16> = prefix.encode_utf16().collect();
298+
if wide.starts_with(prefix_wide.as_slice()) {
299+
let stripped = std::ffi::OsString::from_wide(&wide[prefix_wide.len()..]);
300+
return vite_path::AbsolutePathBuf::new(std::path::PathBuf::from(stripped));
301+
}
302+
}
303+
None
304+
}
305+
306+
/// Whether `path` is covered by any `ignored` entry. An ignored entry matches
307+
/// itself (exact file) and everything under it (directory subtree).
308+
fn is_ignored(path: &RelativePathBuf, ignored: &FxHashSet<RelativePathBuf>) -> bool {
309+
if ignored.is_empty() {
310+
return false;
311+
}
312+
if ignored.contains(path) {
313+
return true;
314+
}
315+
ignored.iter().any(|ig| path.strip_prefix(ig).is_some())
316+
}
317+
180318
/// Select tool-reported env records to embed in the post-run fingerprint.
181319
/// Only `tracked: true` records are included, and names that the user already
182320
/// declared as fingerprinted are skipped.
@@ -220,36 +358,70 @@ fn collect_tracked_env_globs(reports: &Reports) -> BTreeMap<Str, BTreeMap<Str, S
220358
.collect()
221359
}
222360

223-
/// Collect output files matching the configured globs and create a tar.zst
224-
/// archive in the cache directory.
361+
/// Collect output files and create a tar.zst archive in the cache directory.
362+
///
363+
/// Output files are determined by:
364+
/// - fspy-tracked writes (when `output_config.includes_auto` is true)
365+
/// - Positive output globs (always, if configured)
366+
/// - Filtered by negative output globs
367+
/// - Filtered by tool-reported `ignoreOutput` paths (auto writes only)
225368
///
226-
/// Returns `Some(archive_filename)` if files were archived, `None` if the
227-
/// output config has no positive globs or no files matched.
369+
/// Returns `Some(archive_filename)` if files were archived, `None` if no output files.
228370
fn collect_and_archive_outputs(
229371
cache_metadata: &CacheMetadata,
372+
tracking: Option<&TrackingOutcome>,
373+
ignored_output_rels: &FxHashSet<RelativePathBuf>,
230374
workspace_root: &AbsolutePath,
231375
cache_dir: &AbsolutePath,
232376
) -> anyhow::Result<Option<Str>> {
377+
use wax::Program as _;
378+
233379
let output_config = &cache_metadata.output_config;
234380

235-
if output_config.positive_globs.is_empty() {
236-
return Ok(None);
381+
// Collect output files from auto-detection (fspy writes), excluding
382+
// anything the tool reported via `ignoreOutput`.
383+
let mut output_files: FxHashSet<RelativePathBuf> = FxHashSet::default();
384+
385+
if output_config.includes_auto
386+
&& let Some(t) = tracking
387+
{
388+
output_files
389+
.extend(t.path_writes.iter().filter(|p| !is_ignored(p, ignored_output_rels)).cloned());
237390
}
238391

239-
let output_files = glob::collect_glob_paths(
240-
workspace_root,
241-
&output_config.positive_globs,
242-
&output_config.negative_globs,
243-
)?;
392+
// Collect output files from positive globs
393+
if !output_config.positive_globs.is_empty() {
394+
let glob_paths = glob::collect_glob_paths(
395+
workspace_root,
396+
&output_config.positive_globs,
397+
&output_config.negative_globs,
398+
)?;
399+
output_files.extend(glob_paths);
400+
}
401+
402+
// Apply negative globs to auto-detected files
403+
if output_config.includes_auto && !output_config.negative_globs.is_empty() {
404+
let negatives: Vec<wax::Glob<'static>> = output_config
405+
.negative_globs
406+
.iter()
407+
.map(|p| Ok(wax::Glob::new(p.as_str())?.into_owned()))
408+
.collect::<anyhow::Result<_>>()?;
409+
output_files.retain(|path| !negatives.iter().any(|neg| neg.is_match(path.as_str())));
410+
}
244411

245412
if output_files.is_empty() {
246413
return Ok(None);
247414
}
248415

416+
// Sort for deterministic archive content
417+
let mut sorted_files: Vec<RelativePathBuf> = output_files.into_iter().collect();
418+
sorted_files.sort();
419+
420+
// Create archive with UUID filename
249421
let archive_name: Str = vite_str::format!("{}.tar.zst", uuid::Uuid::new_v4());
250422
let archive_path = cache_dir.join(archive_name.as_str());
251423

252-
archive::create_output_archive(workspace_root, &output_files, &archive_path)?;
424+
archive::create_output_archive(workspace_root, &sorted_files, &archive_path)?;
253425

254426
Ok(Some(archive_name))
255427
}

crates/vite_task/src/session/execute/mod.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ struct CacheState<'a> {
9090
/// always present (possibly empty) once we reach the cache-update phase.
9191
std_outputs: Vec<StdOutput>,
9292
/// Runner-aware tracking for cached tasks: an IPC server is always
93-
/// available, and fspy path tracing is attached only when auto input
94-
/// inference needs it. Parts are borrowed in place during the wait/join;
95-
/// the struct is never moved out.
93+
/// available, and fspy path tracing is attached only when auto input or
94+
/// output inference needs it. Parts are borrowed in place during the
95+
/// wait/join; the struct is never moved out.
9696
tracking: Tracking,
9797
}
9898

@@ -101,7 +101,7 @@ struct CacheState<'a> {
101101
type IpcDriver = LocalBoxFuture<'static, Result<Recorder, vite_task_server::Error>>;
102102

103103
/// fspy path-tracking state, present only when a cached task needs automatic
104-
/// input inference.
104+
/// input or output inference.
105105
struct FspyTracking {
106106
input_negative_globs: Vec<wax::Glob<'static>>,
107107
}
@@ -166,7 +166,7 @@ impl<'a> ExecutionMode<'a> {
166166
});
167167
};
168168

169-
let fspy = if metadata.input_config.includes_auto {
169+
let fspy = if metadata.input_config.includes_auto || metadata.output_config.includes_auto {
170170
// Resolve input negative globs for fspy path filtering (already
171171
// workspace-root-relative).
172172
let negatives = metadata
@@ -181,9 +181,11 @@ impl<'a> ExecutionMode<'a> {
181181
None
182182
};
183183

184-
// Bind runner IPC for every cached task. The merged cache-control API
185-
// (`disableCache`) must work even when a task uses explicit inputs and
186-
// therefore does not need fspy auto-input inference.
184+
// Bind runner IPC for every cached task. `disableCache` and
185+
// runner-served envs must work even when a task has explicit inputs
186+
// and does not need fspy auto-input/output inference. Env requests
187+
// resolve against the spawn's full env context, not the filtered child
188+
// `all_envs`.
187189
let (ipc_envs, ServerHandle { driver, stop_accepting }) =
188190
serve(Recorder::new(Arc::clone(envs))).map_err(ExecutionError::IpcServerBind)?;
189191
let tracking =

0 commit comments

Comments
 (0)