Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions sqlparser_bench/benches/sqlparser_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use criterion::{criterion_group, criterion_main, Criterion};
use sqlparser::dialect::GenericDialect;
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect};
use sqlparser::keywords::Keyword;
use sqlparser::parser::Parser;
use sqlparser::tokenizer::{Span, Word};
Expand Down Expand Up @@ -177,11 +177,58 @@ fn parse_compound_chain(c: &mut Criterion) {
group.finish();
}

/// Benchmark parsing pathological `IF(<keyword-fn>(<keyword-fn>(...x` chains
/// that previously caused 2^N work in `parse_prefix`. Each nested
/// `current_time(` segment used to be explored twice at every level (once via
/// the speculative reserved-word arm, once via the unreserved-word fallback),
/// doubling work per level. Post-fix the cost is linear in chain length.
fn parse_prefix_keyword_call_chain(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_prefix_keyword_call_chain");
let dialect = PostgreSqlDialect {};

for &n in &[10usize, 20, 30] {
let sql = String::from("if(") + &"current_time(".repeat(n) + "x";

group.bench_function(format!("chain_{n}"), |b| {
b.iter(|| {
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
});
});
}

group.finish();
}

/// Benchmark parsing pathological `case-case-case-...c` chains that
/// previously caused 2^N work in `parse_prefix`. Each `case` token used to
/// trigger a speculative `parse_case_expr` that recursively descends the
/// chain, but the unreserved-word fallback returns `Identifier(case)` so the
/// overall `parse_prefix` succeeds and the failure cache never fires.
/// Post-fix the per-arm cache short-circuits the speculative descent.
fn parse_prefix_case_chain(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_prefix_case_chain");
let dialect = SQLiteDialect {};

for &n in &[10usize, 20, 30] {
let sql = "case\t-".repeat(n) + "c";

group.bench_function(format!("chain_{n}"), |b| {
b.iter(|| {
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
});
});
}

group.finish();
}

criterion_group!(
benches,
basic_queries,
word_to_ident,
parse_many_identifiers,
parse_compound_chain
parse_compound_chain,
parse_prefix_keyword_call_chain,
parse_prefix_case_chain
);
criterion_main!(benches);
49 changes: 48 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#[cfg(not(feature = "std"))]
use alloc::{
boxed::Box,
collections::BTreeMap,
format,
string::{String, ToString},
vec,
Expand All @@ -24,6 +25,9 @@ use core::{
fmt::{self, Display},
str::FromStr,
};
#[cfg(feature = "std")]
use std::collections::BTreeMap;

use helpers::attached_token::AttachedToken;

use log::debug;
Expand Down Expand Up @@ -359,6 +363,12 @@ pub struct Parser<'a> {
options: ParserOptions,
/// Ensures the stack does not overflow by limiting recursion depth.
recursion_counter: RecursionCounter,
/// Cached errors from `parse_prefix` calls that returned `Err`. See
/// [`Parser::parse_prefix`] for the 2^N patterns this guards.
failed_prefix_positions: BTreeMap<usize, ParserError>,
/// Cached errors from the speculative reserved-word prefix arm. See
/// [`Parser::parse_prefix`] for the 2^N patterns this guards.
failed_reserved_word_prefix_positions: BTreeMap<usize, ParserError>,
}

impl<'a> Parser<'a> {
Expand All @@ -385,6 +395,8 @@ impl<'a> Parser<'a> {
dialect,
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
failed_prefix_positions: BTreeMap::new(),
failed_reserved_word_prefix_positions: BTreeMap::new(),
}
}

Expand Down Expand Up @@ -446,6 +458,8 @@ impl<'a> Parser<'a> {
pub fn with_tokens_with_locations(mut self, tokens: Vec<TokenWithSpan>) -> Self {
self.tokens = tokens;
self.index = 0;
self.failed_prefix_positions.clear();
self.failed_reserved_word_prefix_positions.clear();
self
}

Expand Down Expand Up @@ -1717,6 +1731,23 @@ impl<'a> Parser<'a> {
return prefix;
}

// Memoize parse_prefix failures to break 2^N speculation when both
// prefix arms fail at every level (e.g. `IF(current_time(...x`).
// The per-arm cache in `parse_prefix_inner` complements this for
// chains where the reserved arm fails but the unreserved fallback
// succeeds (e.g. `case-case-...c`).
let start_index = self.index;
if let Some(cached) = self.failed_prefix_positions.get(&start_index) {
return Err(cached.clone());
}
let result = self.parse_prefix_inner();
if let Err(ref e) = result {
self.failed_prefix_positions.insert(start_index, e.clone());
}
result
}

fn parse_prefix_inner(&mut self) -> Result<Expr, ParserError> {
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
// string literal represents a literal of that type. Some examples:
//
Expand Down Expand Up @@ -1801,7 +1832,21 @@ impl<'a> Parser<'a> {
// We first try to parse the word and following tokens as a special expression, and if that fails,
// we rollback and try to parse it as an identifier.
let w = w.clone();
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
// Memoize failed speculative reserved-word parses. When
// the reserved arm (CASE, CURRENT_TIME, etc.) does
// exponential work but the unreserved fallback ultimately
// succeeds, the overall `parse_prefix` returns `Ok` and the
// outer cache never fires. Chains like `case-case-...c`
// need this per-arm cache to break the doubling.
let try_parse_result = if let Some(cached) = self
.failed_reserved_word_prefix_positions
.get(&next_token_index)
{
Err(cached.clone())
} else {
self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span))
};
match try_parse_result {
// This word indicated an expression prefix and parsing was successful
Ok(Some(expr)) => Ok(expr),

Expand All @@ -1815,6 +1860,8 @@ impl<'a> Parser<'a> {
// we rollback and return the parsing error we got from trying to parse a
// special expression (to maintain backwards compatibility of parsing errors).
Err(e) => {
self.failed_reserved_word_prefix_positions
.insert(next_token_index, e.clone());
if !self.dialect.is_reserved_for_identifier(w.keyword) {
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
parser.parse_expr_prefix_by_unreserved_word(&w, span)
Expand Down
46 changes: 46 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19004,3 +19004,49 @@ fn parse_compound_chain_no_exponential_blowup() {
rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}

/// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs
/// like `IF(current_time(current_time(...x`. Each nested `current_time(` used
/// to be explored twice at every level (once via the speculative reserved-word
/// arm, once via the unreserved-word fallback), doubling work per level.
/// Post-fix the failing parse short-circuits via the position-keyed cache.
#[test]
fn parse_prefix_keyword_call_chain_no_exponential_blowup() {
use std::sync::mpsc;
use std::thread;
use std::time::Duration;

let sql = String::from("if(") + &"current_time(".repeat(30) + "x";

let (tx, rx) = mpsc::channel();
thread::spawn(move || {
let _ = Parser::parse_sql(&PostgreSqlDialect {}, &sql);
let _ = tx.send(());
});

rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}

/// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs
/// like `case-case-case-...c`. Each `case` token triggers a speculative
/// `parse_case_expr` that fails, but the unreserved-word fallback returns
/// `Identifier(case)`, so the outer failure cache never fires. Post-fix the
/// per-arm cache short-circuits the speculative descent.
#[test]
fn parse_prefix_case_chain_no_exponential_blowup() {
use std::sync::mpsc;
use std::thread;
use std::time::Duration;

let sql = "case\t-".repeat(30) + "c";

let (tx, rx) = mpsc::channel();
thread::spawn(move || {
let _ = Parser::parse_sql(&SQLiteDialect {}, &sql);
let _ = tx.send(());
});

rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}
Loading