From 2520eb6f550a717d2fada18581b75b21723428b6 Mon Sep 17 00:00:00 2001 From: LoukasPap Date: Wed, 22 Apr 2026 23:44:00 +0300 Subject: [PATCH] Fix \?, \+ and \| to be treated as ERE in BRE mode --- src/sed/compiler.rs | 16 ++++++++++++++-- tests/by-util/test_sed.rs | 9 +++++++++ .../sed/output/subst_alternation_operator | 14 ++++++++++++++ .../sed/output/subst_quantifier_one_or_more | 14 ++++++++++++++ .../sed/output/subst_quantifier_zero_or_one | 14 ++++++++++++++ 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tests/fixtures/sed/output/subst_alternation_operator create mode 100644 tests/fixtures/sed/output/subst_quantifier_one_or_more create mode 100644 tests/fixtures/sed/output/subst_quantifier_zero_or_one diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index 82319761..01c3a6de 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -533,7 +533,7 @@ fn parse_command_ending( } /// Convert a primitive BRE pattern to a safe ERE-compatible pattern string. -/// - Replaces `\(` and `\)` with `(` and `)`. +/// - Replaces `\(`, `\)`, `\?`, `\+` and `\|` with `(`, `)`, `?`, `+` and `|`. /// - Puts single-digit back-references in non-capturing groups.. /// - Escapes ERE-only metacharacters: `+ ? { } | ( )`. /// - Leaves all other characters as-is. @@ -554,6 +554,18 @@ fn bre_to_ere(pattern: &str) -> String { chars.next(); result.push(')'); // Group end } + Some('?') => { + chars.next(); + result.push('?'); // Quantifier 0 or 1 + } + Some('+') => { + chars.next(); + result.push('+'); // Quantifier 1 or more + } + Some('|') => { + chars.next(); + result.push('|'); // Alternation operator + } Some(v) if v.is_ascii_digit() => { // Back-reference. In sed BREs these are single-digit // (\1-\9) whereas fancy_regex supports multi-digit @@ -2192,7 +2204,7 @@ mod tests { // bre_to_ere #[test] fn test_bre_group_translation() { - assert_eq!(bre_to_ere(r"\(abc\)"), "(abc)"); + assert_eq!(bre_to_ere(r"\(a\?b\+c\|\)"), "(a?b+c|)"); assert_eq!(bre_to_ere(r"a\(b\)c"), "a(b)c"); } diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index 6c73def9..13a65767 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -300,6 +300,15 @@ check_output!( LINES1 ] ); +check_output!( + subst_quantifier_zero_or_one, + ["-e", r"s/_[0-9]\([0-9]\)\?/_x\1/g", LINES1] +); +check_output!( + subst_quantifier_one_or_more, + ["-e", r"s/_[0-9]\+/_x/g", LINES1] +); +check_output!(subst_alternation_operator, ["-e", r"s/0\|1/x/g", LINES1]); check_output!(subst_multiline, ["-e", "s/_/u0\\\nu1\\\nu2/g", LINES1]); check_output!(subst_numbered_replacement, ["-e", r"s/./X/4", LINES1]); check_output!(subst_brace, ["-e", r"s/[123]/X/g", LINES1]); diff --git a/tests/fixtures/sed/output/subst_alternation_operator b/tests/fixtures/sed/output/subst_alternation_operator new file mode 100644 index 00000000..c51cab88 --- /dev/null +++ b/tests/fixtures/sed/output/subst_alternation_operator @@ -0,0 +1,14 @@ +lx_x +lx_2 +lx_3 +lx_4 +lx_5 +lx_6 +lx_7 +lx_8 +lx_9 +lx_xx +lx_xx +lx_x2 +lx_x3 +lx_x4 diff --git a/tests/fixtures/sed/output/subst_quantifier_one_or_more b/tests/fixtures/sed/output/subst_quantifier_one_or_more new file mode 100644 index 00000000..ccf98e2d --- /dev/null +++ b/tests/fixtures/sed/output/subst_quantifier_one_or_more @@ -0,0 +1,14 @@ +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x diff --git a/tests/fixtures/sed/output/subst_quantifier_zero_or_one b/tests/fixtures/sed/output/subst_quantifier_zero_or_one new file mode 100644 index 00000000..c3e50536 --- /dev/null +++ b/tests/fixtures/sed/output/subst_quantifier_zero_or_one @@ -0,0 +1,14 @@ +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x +l1_x0 +l1_x1 +l1_x2 +l1_x3 +l1_x4