apache · LucaCappelletti94 · May 23, 2026
diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs
@@ -177,11 +177,36 @@ fn parse_compound_chain(c: &mut Criterion) {
     group.finish();
 }
 
+/// Benchmark parsing pathological chains of `<ident>-NOT-<ident>.` segments
+/// that previously triggered 2^N speculative `parse_not` work. The input
+/// `SELECT x-not-b.x-not-b...` rejects at the trailing `.`. Each `-NOT-`
+/// segment used to recurse into `parse_not` which re-walks the rest of the
+/// chain, doubling the work at every level. Post-fix the cost is linear in
+/// the number of segments.
+fn parse_not_chain(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parse_not_chain");
+    let dialect = GenericDialect {};
+
+    for &n in &[10usize, 20, 30] {
+        let body: String = std::iter::repeat_n("x-not-b.", n).collect();
+        let sql = format!("SELECT {body}");
+
+        group.bench_function(format!("chain_{n}"), |b| {
+            b.iter(|| {
+                let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
+            });
+        });
+    }
+
+    group.finish();
+}
+
 criterion_group!(
     benches,
     basic_queries,
     word_to_ident,
     parse_many_identifiers,
-    parse_compound_chain
+    parse_compound_chain,
+    parse_not_chain
 );
 criterion_main!(benches);
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -12,6 +12,8 @@
 
 //! SQL Parser
 
+#[cfg(not(feature = "std"))]
+use alloc::collections::BTreeSet;
 #[cfg(not(feature = "std"))]
 use alloc::{
     boxed::Box,
@@ -25,6 +27,8 @@ use core::{
     str::FromStr,
 };
 use helpers::attached_token::AttachedToken;
+#[cfg(feature = "std")]
+use std::collections::BTreeSet;
 
 use log::debug;
 
@@ -359,6 +363,14 @@ pub struct Parser<'a> {
     options: ParserOptions,
     /// Ensures the stack does not overflow by limiting recursion depth.
     recursion_counter: RecursionCounter,
+    /// Token indices where a speculative attempt to parse `NOT` as a unary
+    /// prefix operator has already failed during this parse. Re-entering
+    /// `parse_not` at the same position would repeat the same work and fail
+    /// again, so the second visit short-circuits to identifier interpretation.
+    /// Without this cache, inputs like `SELECT x-not-b.x-not-b...` (chains of
+    /// `<ident>-NOT-<ident>.` ending in a parse error) trigger 2^N exploration
+    /// because each `-NOT-` segment otherwise re-walks the rest of the chain.
+    failed_unary_not_positions: BTreeSet<usize>,
 }
 
 impl<'a> Parser<'a> {
@@ -385,6 +397,7 @@ impl<'a> Parser<'a> {
             dialect,
             recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
             options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
+            failed_unary_not_positions: BTreeSet::new(),
         }
     }
 
@@ -446,6 +459,7 @@ impl<'a> Parser<'a> {
     pub fn with_tokens_with_locations(mut self, tokens: Vec<TokenWithSpan>) -> Self {
         self.tokens = tokens;
         self.index = 0;
+        self.failed_unary_not_positions.clear();
         self
     }
 
@@ -1801,6 +1815,17 @@ impl<'a> Parser<'a> {
                 // We first try to parse the word and following tokens as a special expression, and if that fails,
                 // we rollback and try to parse it as an identifier.
                 let w = w.clone();
+                // If we already tried to parse `NOT` as a unary prefix operator
+                // at this exact position and it failed, skip the speculative
+                // path and fall back to identifier interpretation directly. The
+                // speculative `parse_not` re-walks the rest of the expression,
+                // so repeating it on every visit causes 2^N work on chains
+                // like `x-not-b.x-not-b...` that end in a parse error.
+                if w.keyword == Keyword::NOT
+                    && self.failed_unary_not_positions.contains(&self.index)
+                {
+                    return self.parse_expr_prefix_by_unreserved_word(&w, span);
+                }
                 match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
                     // This word indicated an expression prefix and parsing was successful
                     Ok(Some(expr)) => Ok(expr),
@@ -1815,6 +1840,9 @@ impl<'a> Parser<'a> {
                     // we rollback and return the parsing error we got from trying to parse a
                     // special expression (to maintain backwards compatibility of parsing errors).
                     Err(e) => {
+                        if w.keyword == Keyword::NOT {
+                            self.failed_unary_not_positions.insert(self.index);
+                        }
                         if !self.dialect.is_reserved_for_identifier(w.keyword) {
                             if let Ok(Some(expr)) = self.maybe_parse(|parser| {
                                 parser.parse_expr_prefix_by_unreserved_word(&w, span)

diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
@@ -19004,3 +19004,29 @@ fn parse_compound_chain_no_exponential_blowup() {
     rx.recv_timeout(Duration::from_secs(5))
         .expect("parser should reject this quickly, not loop exponentially");
 }
+
+/// Regression test for the 2^N parse-time blowup on chains of
+/// `<ident>-NOT-<ident>.` segments ending in a parse error. Each `-NOT-`
+/// segment used to trigger a speculative `parse_not` that re-walks the rest of
+/// the chain, so re-entering it at every `NOT` doubled the work. The fix
+/// caches positions where the speculative `parse_not` already failed and
+/// skips repeating it. Post-fix the parser rejects this in well under a
+/// millisecond, so the timeout is a hang guard, not a perf threshold.
+#[test]
+fn parse_not_chain_no_exponential_blowup() {
+    use std::sync::mpsc;
+    use std::thread;
+    use std::time::Duration;
+
+    let body: String = std::iter::repeat_n("x-not-b.", 30).collect();
+    let sql = format!("SELECT {body}");
+
+    let (tx, rx) = mpsc::channel();
+    thread::spawn(move || {
+        let _ = Parser::parse_sql(&GenericDialect {}, &sql);
+        let _ = tx.send(());
+    });
+
+    rx.recv_timeout(Duration::from_secs(5))
+        .expect("parser should reject this quickly, not loop exponentially");
+}