Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion sqlparser_bench/benches/sqlparser_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,36 @@ fn parse_compound_chain(c: &mut Criterion) {
group.finish();
}

/// Benchmark parsing pathological chains of `<ident>-NOT-<ident>.` segments
/// that previously triggered 2^N speculative `parse_not` work. The input
/// `SELECT x-not-b.x-not-b...` rejects at the trailing `.`. Each `-NOT-`
/// segment used to recurse into `parse_not` which re-walks the rest of the
/// chain, doubling the work at every level. Post-fix the cost is linear in
/// the number of segments.
fn parse_not_chain(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_not_chain");
let dialect = GenericDialect {};

for &n in &[10usize, 20, 30] {
let body: String = std::iter::repeat_n("x-not-b.", n).collect();
let sql = format!("SELECT {body}");

group.bench_function(format!("chain_{n}"), |b| {
b.iter(|| {
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
});
});
}

group.finish();
}

criterion_group!(
benches,
basic_queries,
word_to_ident,
parse_many_identifiers,
parse_compound_chain
parse_compound_chain,
parse_not_chain
);
criterion_main!(benches);
28 changes: 28 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

//! SQL Parser

#[cfg(not(feature = "std"))]
use alloc::collections::BTreeSet;
#[cfg(not(feature = "std"))]
use alloc::{
boxed::Box,
Expand All @@ -25,6 +27,8 @@ use core::{
str::FromStr,
};
use helpers::attached_token::AttachedToken;
#[cfg(feature = "std")]
use std::collections::BTreeSet;

use log::debug;

Expand Down Expand Up @@ -359,6 +363,14 @@ pub struct Parser<'a> {
options: ParserOptions,
/// Ensures the stack does not overflow by limiting recursion depth.
recursion_counter: RecursionCounter,
/// Token indices where a speculative attempt to parse `NOT` as a unary
/// prefix operator has already failed during this parse. Re-entering
/// `parse_not` at the same position would repeat the same work and fail
/// again, so the second visit short-circuits to identifier interpretation.
/// Without this cache, inputs like `SELECT x-not-b.x-not-b...` (chains of
/// `<ident>-NOT-<ident>.` ending in a parse error) trigger 2^N exploration
/// because each `-NOT-` segment otherwise re-walks the rest of the chain.
failed_unary_not_positions: BTreeSet<usize>,
}

impl<'a> Parser<'a> {
Expand All @@ -385,6 +397,7 @@ impl<'a> Parser<'a> {
dialect,
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
failed_unary_not_positions: BTreeSet::new(),
}
}

Expand Down Expand Up @@ -446,6 +459,7 @@ impl<'a> Parser<'a> {
pub fn with_tokens_with_locations(mut self, tokens: Vec<TokenWithSpan>) -> Self {
self.tokens = tokens;
self.index = 0;
self.failed_unary_not_positions.clear();
self
}

Expand Down Expand Up @@ -1801,6 +1815,17 @@ impl<'a> Parser<'a> {
// We first try to parse the word and following tokens as a special expression, and if that fails,
// we rollback and try to parse it as an identifier.
let w = w.clone();
// If we already tried to parse `NOT` as a unary prefix operator
// at this exact position and it failed, skip the speculative
// path and fall back to identifier interpretation directly. The
// speculative `parse_not` re-walks the rest of the expression,
// so repeating it on every visit causes 2^N work on chains
// like `x-not-b.x-not-b...` that end in a parse error.
if w.keyword == Keyword::NOT
&& self.failed_unary_not_positions.contains(&self.index)
{
return self.parse_expr_prefix_by_unreserved_word(&w, span);
}
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
// This word indicated an expression prefix and parsing was successful
Ok(Some(expr)) => Ok(expr),
Expand All @@ -1815,6 +1840,9 @@ impl<'a> Parser<'a> {
// we rollback and return the parsing error we got from trying to parse a
// special expression (to maintain backwards compatibility of parsing errors).
Err(e) => {
if w.keyword == Keyword::NOT {
self.failed_unary_not_positions.insert(self.index);
}
if !self.dialect.is_reserved_for_identifier(w.keyword) {
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
parser.parse_expr_prefix_by_unreserved_word(&w, span)
Expand Down
26 changes: 26 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19004,3 +19004,29 @@ fn parse_compound_chain_no_exponential_blowup() {
rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}

/// Regression test for the 2^N parse-time blowup on chains of
/// `<ident>-NOT-<ident>.` segments ending in a parse error. Each `-NOT-`
/// segment used to trigger a speculative `parse_not` that re-walks the rest of
/// the chain, so re-entering it at every `NOT` doubled the work. The fix
/// caches positions where the speculative `parse_not` already failed and
/// skips repeating it. Post-fix the parser rejects this in well under a
/// millisecond, so the timeout is a hang guard, not a perf threshold.
#[test]
fn parse_not_chain_no_exponential_blowup() {
use std::sync::mpsc;
use std::thread;
use std::time::Duration;

let body: String = std::iter::repeat_n("x-not-b.", 30).collect();
let sql = format!("SELECT {body}");

let (tx, rx) = mpsc::channel();
thread::spawn(move || {
let _ = Parser::parse_sql(&GenericDialect {}, &sql);
let _ = tx.send(());
});

rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}
Loading