Skip to content

Commit 1751d70

Browse files
committed
Fix parsing of corpus tests when --- delimiter is missing
1 parent ac8eb50 commit 1751d70

1 file changed

Lines changed: 44 additions & 34 deletions

File tree

unified/extractor/tests/corpus_tests.rs

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ fn is_header_rule(line: &str) -> bool {
2626
trimmed.len() >= 3 && trimmed.chars().all(|c| c == '=')
2727
}
2828

29+
fn is_next_case_header(lines: &[&str], i: usize) -> bool {
30+
is_header_rule(lines[i])
31+
&& i + 2 < lines.len()
32+
&& !lines[i + 1].trim().is_empty()
33+
&& is_header_rule(lines[i + 2])
34+
}
35+
2936
fn parse_corpus(content: &str) -> Vec<CorpusCase> {
3037
let lines: Vec<&str> = content.lines().collect();
3138
let mut i = 0;
@@ -58,48 +65,51 @@ fn parse_corpus(content: &str) -> Vec<CorpusCase> {
5865

5966
let input_start = i;
6067
while i < lines.len() && lines[i].trim() != "---" {
68+
if is_next_case_header(&lines, i) {
69+
break;
70+
}
6171
i += 1;
6272
}
63-
assert!(i < lines.len(), "Missing --- separator for case {name}");
6473
let input = lines[input_start..i].join("\n").trim_end().to_string();
65-
i += 1;
74+
let raw;
75+
let expected;
76+
if i >= lines.len() || lines[i].trim() != "---" {
77+
// No `---` separator before next case (or EOF). Treat the
78+
// remaining sections as empty.
79+
raw = String::new();
80+
expected = String::new();
81+
} else {
82+
i += 1;
6683

67-
// Raw tree-sitter parse section. New-format files have a second
68-
// `---` separator between the raw tree and the mapped AST. Legacy
69-
// files (with only one separator) have no raw section — in that
70-
// case `raw` stays empty and update mode will populate it.
71-
let raw_start = i;
72-
let mut next_sep = i;
73-
while next_sep < lines.len() && lines[next_sep].trim() != "---" {
74-
if is_header_rule(lines[next_sep])
75-
&& next_sep + 2 < lines.len()
76-
&& !lines[next_sep + 1].trim().is_empty()
77-
&& is_header_rule(lines[next_sep + 2])
78-
{
79-
break;
84+
// Raw tree-sitter parse section. New-format files have a second
85+
// `---` separator between the raw tree and the mapped AST. Legacy
86+
// files (with only one separator) have no raw section — in that
87+
// case `raw` stays empty and update mode will populate it.
88+
let raw_start = i;
89+
let mut next_sep = i;
90+
while next_sep < lines.len() && lines[next_sep].trim() != "---" {
91+
if is_next_case_header(&lines, next_sep) {
92+
break;
93+
}
94+
next_sep += 1;
8095
}
81-
next_sep += 1;
82-
}
83-
let raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" {
84-
let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string();
85-
i = next_sep + 1;
86-
raw_text
87-
} else {
88-
String::new()
89-
};
96+
raw = if next_sep < lines.len() && lines[next_sep].trim() == "---" {
97+
let raw_text = lines[raw_start..next_sep].join("\n").trim().to_string();
98+
i = next_sep + 1;
99+
raw_text
100+
} else {
101+
String::new()
102+
};
90103

91-
let expected_start = i;
92-
while i < lines.len() {
93-
if is_header_rule(lines[i])
94-
&& i + 2 < lines.len()
95-
&& !lines[i + 1].trim().is_empty()
96-
&& is_header_rule(lines[i + 2])
97-
{
98-
break;
104+
let expected_start = i;
105+
while i < lines.len() {
106+
if is_next_case_header(&lines, i) {
107+
break;
108+
}
109+
i += 1;
99110
}
100-
i += 1;
111+
expected = lines[expected_start..i].join("\n").trim().to_string();
101112
}
102-
let expected = lines[expected_start..i].join("\n").trim().to_string();
103113

104114
cases.push(CorpusCase {
105115
name,

0 commit comments

Comments
 (0)