Skip to content

Commit 7edf010

Browse files
authored
Merge pull request #21924 from asgerf/asgerf/yeast-changes
Yeast: some fixes
2 parents 167c837 + 3f3bed6 commit 7edf010

10 files changed

Lines changed: 203 additions & 59 deletions

File tree

shared/tree-sitter-extractor/src/generator/mod.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,18 @@ fn convert_nodes(
305305
// type.
306306
let members: Set<&str> = n_members
307307
.iter()
308-
.map(|n| nodes.get(n).unwrap().dbscheme_name.as_str())
308+
.map(|n| {
309+
nodes
310+
.get(n)
311+
.unwrap_or_else(|| {
312+
panic!(
313+
"union type '{}' references unknown member node type {:?}",
314+
node.dbscheme_name, n
315+
)
316+
})
317+
.dbscheme_name
318+
.as_str()
319+
})
309320
.collect();
310321
entries.push(dbscheme::Entry::Union(dbscheme::Union {
311322
name: &node.dbscheme_name,

shared/yeast-macros/src/parse.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
411411
// Named fields — compute each value into a temp, then reference it
412412
while peek_is_field(tokens) {
413413
let field_name = expect_ident(tokens, "expected field name")?;
414-
let field_str = field_name.to_string();
414+
let field_str = field_name.to_string().strip_prefix("r#").unwrap_or(&field_name.to_string()).to_string();
415415
expect_punct(tokens, ':', "expected `:` after field name")?;
416416
let temp = Ident::new(
417417
&format!("__field_{field_str}_{field_counter}"),
@@ -437,15 +437,19 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
437437
.map(::std::convert::Into::<usize>::into)
438438
.collect();
439439
});
440-
field_args.push(quote! { (#field_str, #temp) });
440+
// An empty splice means the field is absent — skip it
441+
// entirely rather than emitting an empty named field.
442+
field_args.push(quote! {
443+
if !#temp.is_empty() { __fields.push((#field_str, #temp)); }
444+
});
441445
continue;
442446
}
443447
}
444448
}
445449

446450
let value = parse_direct_node(tokens, ctx)?;
447451
stmts.push(quote! { let #temp: usize = #value; });
448-
field_args.push(quote! { (#field_str, vec![#temp]) });
452+
field_args.push(quote! { __fields.push((#field_str, vec![#temp])); });
449453
}
450454

451455
// After all named fields, no other tokens are allowed.
@@ -461,7 +465,9 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
461465
Ok(quote! {
462466
{
463467
#(#stmts)*
464-
#ctx.node(#kind_str, vec![#(#field_args),*])
468+
let mut __fields: Vec<(&str, Vec<usize>)> = Vec::new();
469+
#(#field_args)*
470+
#ctx.node(#kind_str, __fields)
465471
}
466472
})
467473
}
@@ -475,6 +481,11 @@ fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result<Vec<TokenStream
475481
let group = expect_group(tokens, Delimiter::Parenthesis)?;
476482
let mut inner = group.stream().into_iter().peekable();
477483

484+
// Empty `()` represents an empty sequence — emit nothing.
485+
if inner.peek().is_none() {
486+
continue;
487+
}
488+
478489
// Regular node
479490
let node = parse_direct_node_inner(&mut inner, ctx)?;
480491
items.push(quote! { __nodes.push(#node); });

shared/yeast/src/captures.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,20 @@ impl Captures {
6363
}
6464

6565
/// Apply a fallible function to every captured id (across all keys),
66-
/// replacing each id with the result. Stops and returns the error on
67-
/// the first failure.
66+
/// replacing each id with the results. A function returning an empty
67+
/// vector removes the capture; returning multiple ids splices them
68+
/// into the capture's value list (suitable for `*`/`+` captures).
69+
/// Stops and returns the error on the first failure.
6870
pub fn try_map_all_captures<E>(
6971
&mut self,
70-
mut f: impl FnMut(Id) -> Result<Id, E>,
72+
mut f: impl FnMut(Id) -> Result<Vec<Id>, E>,
7173
) -> Result<(), E> {
7274
for ids in self.captures.values_mut() {
73-
for id in ids {
74-
*id = f(*id)?;
75+
let mut new_ids = Vec::with_capacity(ids.len());
76+
for &id in ids.iter() {
77+
new_ids.extend(f(id)?);
7578
}
79+
*ids = new_ids;
7680
}
7781
Ok(())
7882
}

shared/yeast/src/dump.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,16 @@ fn dump_node(
273273
}
274274
}
275275

276+
// Check for required fields that are absent
277+
if let Some((schema, _, _)) = type_check {
278+
for (field_id, field_name) in schema.required_fields_for_kind(node.kind_name()) {
279+
if !node.fields.contains_key(&field_id) {
280+
let name = field_name.unwrap_or("child");
281+
writeln!(out, "{prefix} <-- ERROR: missing required field '{name}'").unwrap();
282+
}
283+
}
284+
}
285+
276286
// Unnamed children — skip unnamed tokens (keywords, punctuation)
277287
if let Some(children) = node.fields.get(&CHILD_FIELD) {
278288
let child_type_check = type_check.map(|(schema, _, _)| {

shared/yeast/src/lib.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,15 @@ impl Node {
563563
NodeContent::DynamicString(s) => Some(s.to_string()),
564564
}
565565
}
566+
567+
/// Read the child ids stored under a given field, or an empty slice if
568+
/// no such field is present on this node.
569+
pub fn field_children(&self, field_id: FieldId) -> &[Id] {
570+
self.fields
571+
.get(&field_id)
572+
.map(|v| v.as_slice())
573+
.unwrap_or(&[])
574+
}
566575
}
567576

568577
/// The contents of a node is either a range in the original source file,
@@ -836,17 +845,9 @@ fn apply_one_shot_rules_inner(
836845
// pattern root): re-analyzing it would match the same rule
837846
// again indefinitely.
838847
if captured_id == id {
839-
return Ok(captured_id);
840-
}
841-
let result =
842-
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)?;
843-
if result.len() != 1 {
844-
return Err(format!(
845-
"OneShot: recursion on captured node produced {} results, expected exactly 1",
846-
result.len()
847-
));
848+
return Ok(vec![captured_id]);
848849
}
849-
Ok(result[0])
850+
apply_one_shot_rules_inner(index, ast, captured_id, fresh, rewrite_depth + 1)
850851
})?;
851852
return Ok(rule.run_transform(ast, captures, id, fresh));
852853
}

shared/yeast/src/node_types_yaml.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,14 @@ fn apply_yaml_to_schema(
314314
node_types.sort_by(|a, b| a.kind.cmp(&b.kind).then(a.named.cmp(&b.named)));
315315
node_types.dedup_by(|a, b| a.kind == b.kind && a.named == b.named);
316316
schema.set_field_types(parent_kind, field_id, node_types);
317+
schema.set_field_cardinality(
318+
parent_kind,
319+
field_id,
320+
crate::schema::FieldCardinality {
321+
multiple: spec.multiple,
322+
required: spec.required,
323+
},
324+
);
317325
}
318326
}
319327
}

shared/yeast/src/query.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,15 @@ impl QueryListElem {
178178
let Some(child) = remaining_children.next() else {
179179
return Ok(false);
180180
};
181-
if skip_unnamed {
182-
let node = ast.get_node(child).unwrap();
183-
if !node.is_named() {
184-
continue;
185-
}
181+
let node = ast.get_node(child).unwrap();
182+
// Skip tree-sitter `extras` (e.g. comments) during
183+
// positional matching: they are conceptually invisible
184+
// between siblings, mirroring tree-sitter query semantics.
185+
if node.is_extra() {
186+
continue;
187+
}
188+
if skip_unnamed && !node.is_named() {
189+
continue;
186190
}
187191
let snapshot = matches.clone();
188192
if sub_query.do_match(ast, child, matches)? {

shared/yeast/src/schema.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ pub struct NodeType {
88
pub named: bool,
99
}
1010

11+
/// Multiplicity/optionality of a field declaration.
12+
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
13+
pub struct FieldCardinality {
14+
/// Whether the field may hold more than one child.
15+
pub multiple: bool,
16+
/// Whether at least one child must be present.
17+
pub required: bool,
18+
}
19+
1120
/// A schema defining node kinds and field names for the output AST.
1221
/// Built from a node-types.yml file, independent of any tree-sitter grammar.
1322
///
@@ -32,6 +41,7 @@ pub struct Schema {
3241
kind_names: BTreeMap<KindId, &'static str>,
3342
next_kind_id: KindId,
3443
field_types: BTreeMap<(String, FieldId), Vec<NodeType>>,
44+
field_cardinalities: BTreeMap<(String, FieldId), FieldCardinality>,
3545
supertypes: BTreeMap<String, Vec<NodeType>>,
3646
}
3747

@@ -52,6 +62,7 @@ impl Schema {
5262
kind_names: BTreeMap::new(),
5363
next_kind_id: 1, // 0 is reserved
5464
field_types: BTreeMap::new(),
65+
field_cardinalities: BTreeMap::new(),
5566
supertypes: BTreeMap::new(),
5667
}
5768
}
@@ -196,6 +207,42 @@ impl Schema {
196207
.get(&(parent_kind.to_string(), field_id))
197208
}
198209

210+
pub fn set_field_cardinality(
211+
&mut self,
212+
parent_kind: &str,
213+
field_id: FieldId,
214+
cardinality: FieldCardinality,
215+
) {
216+
self.field_cardinalities
217+
.insert((parent_kind.to_string(), field_id), cardinality);
218+
}
219+
220+
/// Returns the declared cardinality for a field, if known.
221+
pub fn field_cardinality(
222+
&self,
223+
parent_kind: &str,
224+
field_id: FieldId,
225+
) -> Option<FieldCardinality> {
226+
self.field_cardinalities
227+
.get(&(parent_kind.to_string(), field_id))
228+
.copied()
229+
}
230+
231+
/// Returns an iterator over all `(field_id, field_name)` pairs that are
232+
/// declared as required (`required: true`) for the given `parent_kind`.
233+
pub fn required_fields_for_kind<'a>(
234+
&'a self,
235+
parent_kind: &'a str,
236+
) -> impl Iterator<Item = (FieldId, Option<&'static str>)> + 'a {
237+
self.field_cardinalities
238+
.iter()
239+
.filter(move |((kind, _), card)| kind == parent_kind && card.required)
240+
.map(move |((_, field_id), _)| {
241+
let name = self.field_name_for_id(*field_id);
242+
(*field_id, name)
243+
})
244+
}
245+
199246
pub fn set_supertype_members(&mut self, supertype: &str, node_types: Vec<NodeType>) {
200247
self.supertypes.insert(supertype.to_string(), node_types);
201248
}

shared/yeast/tests/test.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,44 @@ fn test_query_no_match() {
274274
assert!(!matched);
275275
}
276276

277+
#[test]
278+
fn test_query_skips_extras_in_positional_match() {
279+
// Regression test: positional wildcards `(_)` must not bind to
280+
// tree-sitter `extras` (e.g. comments) during forward-scan; extras
281+
// are conceptually invisible between siblings, matching tree-sitter
282+
// query semantics. Without this, a later rule that translates a
283+
// captured comment to nothing (a common idiom, e.g.
284+
// `(comment) => ()` in Swift) leaves the capture's match-list empty
285+
// and causes the transform to fail with "Variable X has 0 matches".
286+
let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]);
287+
let ast = runner.run("[1, # comment\n2]").unwrap();
288+
289+
// Navigate to the `array` node: program -> array.
290+
let mut cursor = AstCursor::new(&ast);
291+
cursor.goto_first_child();
292+
let array_id = cursor.node_id();
293+
assert_eq!(ast.get_node(array_id).unwrap().kind(), "array");
294+
295+
// Two positional wildcards should bind to the two integers, skipping
296+
// the comment that sits between them.
297+
let query = yeast::query!((array (_) @a (_) @b));
298+
let mut captures = yeast::captures::Captures::new();
299+
let matched = query.do_match(&ast, array_id, &mut captures).unwrap();
300+
assert!(matched);
301+
assert_eq!(
302+
ast.get_node(captures.get_var("a").unwrap())
303+
.unwrap()
304+
.kind(),
305+
"integer"
306+
);
307+
assert_eq!(
308+
ast.get_node(captures.get_var("b").unwrap())
309+
.unwrap()
310+
.kind(),
311+
"integer"
312+
);
313+
}
314+
277315
#[test]
278316
fn test_reachable_nodes_excludes_orphaned_rewrite_nodes() {
279317
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();

0 commit comments

Comments
 (0)