diff --git a/fluree-db-memory/src/store.rs b/fluree-db-memory/src/store.rs index 083ddc7ad..24fd5055b 100644 --- a/fluree-db-memory/src/store.rs +++ b/fluree-db-memory/src/store.rs @@ -211,7 +211,20 @@ impl MemoryStore { } debug!("Creating memory ledger"); - self.fluree.create_ledger(MEMORY_LEDGER).await?; + match self.fluree.create_ledger(MEMORY_LEDGER).await { + Ok(_) => {} + Err(fluree_db_api::ApiError::LedgerExists(_)) => { + // Concurrent tool calls race through `is_initialized() == + // false` together (initialize takes no lock — mutation paths + // already hold the mutation lock when they call it, so it + // must not re-acquire). Losing the create race means another + // task is initializing; let it transact the schema. + debug!("Memory ledger created concurrently — skipping init"); + self.ensure_file_structure()?; + return Ok(()); + } + Err(e) => return Err(e.into()), + } debug!("Transacting memory schema"); let schema = memory_schema_jsonld(); @@ -790,9 +803,13 @@ WHERE {{\n\ ) -> Result> { self.initialize().await?; + // Escape backslashes before quotes — the s-expression parser resolves + // `\\` and `\"`, and content with an unescaped `"` followed by `)` + // used to wedge the parser in an infinite loop (a permanently pinned + // tokio worker per occurrence). let bind_expr = format!( "(fulltext ?content \"{}\")", - query_text.replace('"', "\\\"") + query_text.replace('\\', "\\\\").replace('"', "\\\"") ); let query = json!({ @@ -1216,6 +1233,39 @@ mod tests { ); } + #[tokio::test] + async fn recall_fulltext_survives_quotes_and_parens_in_query() { + // Regression for the MCP hang: content with a quote-then-paren + // sequence, fed back as the related-memories recall query, used to + // wedge the s-expression parser in an infinite loop. + let fluree = FlureeBuilder::memory().build_memory(); + let store = MemoryStore::new(fluree, None); + store.initialize().await.expect("initialize"); + + let content = r#"emit_list_item stores index metadata ("not rdf-list bnodes") and silently prunes operator-kept rows. Applies to the scalar-Eq push.""#; + let input = crate::types::MemoryInput { + kind: MemoryKind::Fact, + content: content.to_string(), + tags: vec!["repro".to_string()], + scope: crate::types::Scope::Repo, + severity: None, + artifact_refs: vec![], + branch: None, + rationale: Some(r#"backslash \ and "quotes" too"#.to_string()), + alternatives: None, + }; + store.add(input).await.expect("add"); + + let hits = store + .recall_fulltext(content, 4) + .await + .expect("recall must parse, not hang"); + assert!( + !hits.is_empty(), + "the just-added memory should match its own content" + ); + } + #[tokio::test] async fn drop_and_reinit_drops_by_whole_ledger_name() { let fluree = FlureeBuilder::memory().build_memory(); diff --git a/fluree-db-query/src/parse/filter_sexpr.rs b/fluree-db-query/src/parse/filter_sexpr.rs index a1deeb1cd..9ef923164 100644 --- a/fluree-db-query/src/parse/filter_sexpr.rs +++ b/fluree-db-query/src/parse/filter_sexpr.rs @@ -20,8 +20,8 @@ //! //! # Limitations //! -//! - Quoted strings cannot contain whitespace, parentheses, or escape sequences -//! - For complex string comparisons, use data expression format instead +//! - Quoted strings may contain whitespace, parentheses, and backslash +//! escapes (`\"` and `\\`); any other `\x` sequence is preserved verbatim use super::ast::UnresolvedExpression; use super::error::{ParseError, Result}; @@ -36,12 +36,8 @@ use std::sync::Arc; /// - Atoms: `?var`, numbers, `true`/`false`, quoted strings `"text"` /// - Expressions: `(op arg1 arg2 ...)` /// - Nested: `(and (> ?x 10) (< ?y 100))` -/// -/// # Limitations -/// - Quoted strings cannot contain whitespace, parentheses, or escape sequences -/// (e.g., `"Smith Jr"` with a space will not parse correctly) -/// - For complex string comparisons, use the data expression format instead: -/// `["filter", ["=", "?name", "Smith Jr"]]` +/// - Quoted strings may contain whitespace, parentheses, and backslash +/// escapes (`\"`, `\\`) pub fn parse_s_expression(s: &str) -> Result { let s = s.trim(); @@ -108,6 +104,51 @@ pub fn parse_s_expression(s: &str) -> Result { } } +/// Byte index just past the closing quote of the string literal at the start +/// of `s` (which must begin with `"`), honoring `\"` / `\\` escapes. +/// Returns `None` if the literal is unterminated. +fn quoted_string_end(s: &str) -> Option { + debug_assert!(s.starts_with('"')); + let mut escaped = false; + for (i, c) in s.char_indices().skip(1) { + if escaped { + escaped = false; + continue; + } + match c { + '\\' => escaped = true, + '"' => return Some(i + 1), + _ => {} + } + } + None +} + +/// Resolve backslash escapes in a string literal's contents. Only `\"` and +/// `\\` are recognized; any other `\x` is preserved verbatim. +fn unescape_string(s: &str) -> String { + if !s.contains('\\') { + return s.to_string(); + } + let mut out = String::with_capacity(s.len()); + let mut chars = s.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some(next @ ('"' | '\\')) => out.push(next), + Some(next) => { + out.push('\\'); + out.push(next); + } + None => out.push('\\'), + } + } else { + out.push(c); + } + } + out +} + /// Parse an atom in an S-expression (variable, number, string, boolean) fn parse_s_expression_atom(s: &str) -> Result { let s = s.trim(); @@ -136,7 +177,7 @@ fn parse_s_expression_atom(s: &str) -> Result { // String (might be quoted) if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 { let unquoted = &s[1..s.len() - 1]; - return Ok(UnresolvedExpression::string(unquoted)); + return Ok(UnresolvedExpression::string(unescape_string(unquoted))); } // Plain string @@ -161,12 +202,11 @@ fn parse_s_expression_args(s: &str) -> Result> { args.push(parse_s_expression_list(expr_str)?); remaining = remaining[end + 1..].trim(); } else if remaining.starts_with('"') { - // Handle quoted string as a single token (may contain whitespace/parens) - let after_open = &remaining[1..]; - if let Some(close_pos) = after_open.find('"') { - let end = close_pos + 2; // include both quotes - let atom = &remaining[..end]; - args.push(parse_s_expression_atom(atom)?); + // Handle quoted string as a single token (may contain + // whitespace/parens and `\"` / `\\` escapes) + if let Some(end) = quoted_string_end(remaining) { + let inner = &remaining[1..end - 1]; + args.push(UnresolvedExpression::string(unescape_string(inner))); remaining = remaining[end..].trim(); } else { return Err(ParseError::InvalidFilter( @@ -178,10 +218,15 @@ fn parse_s_expression_args(s: &str) -> Result> { let end = remaining .find(|c: char| c.is_whitespace() || c == '(' || c == ')') .unwrap_or(remaining.len()); - if end > 0 { - let atom = &remaining[..end]; - args.push(parse_s_expression_atom(atom)?); + if end == 0 { + // A stray ')' consumes nothing; erroring (rather than + // continuing) is what guarantees this loop terminates. + return Err(ParseError::InvalidFilter(format!( + "unexpected ')' in expression arguments: {remaining}" + ))); } + let atom = &remaining[..end]; + args.push(parse_s_expression_atom(atom)?); remaining = remaining[end..].trim(); } } @@ -233,9 +278,24 @@ fn parse_s_expression_arg(s: &str) -> Result<(UnresolvedExpression, &str)> { let expr = parse_s_expression_list(expr_str)?; return Ok((expr, &s[end + 1..])); } + if s.starts_with('"') { + let end = quoted_string_end(s) + .ok_or_else(|| ParseError::InvalidFilter("unclosed string literal".to_string()))?; + let inner = &s[1..end - 1]; + return Ok(( + UnresolvedExpression::string(unescape_string(inner)), + &s[end..], + )); + } let end = s .find(|c: char| c.is_whitespace() || c == '(' || c == ')' || c == '[' || c == ']') .unwrap_or(s.len()); + if end == 0 { + return Err(ParseError::InvalidFilter(format!( + "unexpected '{}' in expression argument", + &s[..s.chars().next().map_or(0, char::len_utf8)] + ))); + } let atom = &s[..end]; let expr = parse_s_expression_atom(atom)?; Ok((expr, &s[end..])) @@ -479,6 +539,62 @@ mod tests { } } + #[test] + fn test_stray_close_paren_errors_instead_of_looping() { + // Regression: a stray ')' in the argument list used to make + // parse_s_expression_args consume nothing and spin forever, + // permanently pinning a tokio worker at 100% CPU. + let result = parse_s_expression("(f ?x ))"); + assert!(result.is_err(), "stray ')' must be a parse error"); + } + + #[test] + fn test_quoted_string_with_escapes_and_parens() { + // Escaped quotes inside a string literal must not terminate the + // token; parens inside the string are literal content. + let expr = parse_s_expression(r#"(fulltext ?content "prunes rows (see \"push.\") tail")"#) + .unwrap(); + match expr { + UnresolvedExpression::Call { func, args } => { + assert_eq!(func.as_ref(), "fulltext"); + assert_eq!(args.len(), 2); + match &args[1] { + UnresolvedExpression::Const(_) => {} + other => panic!("expected string constant, got {other:?}"), + } + } + _ => panic!("Expected Call"), + } + } + + #[test] + fn test_fulltext_bind_roundtrip_with_hostile_content() { + // The exact shape that wedged the memory MCP server: user content + // containing a quote-then-paren, interpolated into a fulltext bind. + let content = r#"emit_list_item stores index metadata ("not rdf-list bnodes") and prunes operator-kept rows (scalar-Eq push.")"#; + let bind_expr = format!( + "(fulltext ?content \"{}\")", + content.replace('\\', "\\\\").replace('"', "\\\"") + ); + let expr = parse_s_expression(&bind_expr).expect("must parse, not hang"); + match expr { + UnresolvedExpression::Call { func, args } => { + assert_eq!(func.as_ref(), "fulltext"); + assert_eq!(args.len(), 2); + } + _ => panic!("Expected Call"), + } + } + + #[test] + fn test_unescape_string_helper() { + assert_eq!(unescape_string(r#"a\"b"#), r#"a"b"#); + assert_eq!(unescape_string(r"a\\b"), r"a\b"); + // Unrecognized escapes are preserved verbatim + assert_eq!(unescape_string(r"a\nb"), r"a\nb"); + assert_eq!(unescape_string("plain"), "plain"); + } + #[test] fn test_parse_single_arg_arithmetic() { let expr = parse_s_expression("(+ ?x)").unwrap(); diff --git a/fluree-db-query/src/parse/sexpr_tokenize.rs b/fluree-db-query/src/parse/sexpr_tokenize.rs index 51cde5c78..c70360c96 100644 --- a/fluree-db-query/src/parse/sexpr_tokenize.rs +++ b/fluree-db-query/src/parse/sexpr_tokenize.rs @@ -168,7 +168,7 @@ fn tokenize_sexpr_inner( current.clear(); } chars.next(); // consume opening quote - // Read until closing quote + // Read until closing quote, honoring \" and \\ let mut string_val = String::new(); let mut closed = false; while let Some(&sc) = chars.peek() { @@ -177,6 +177,18 @@ fn tokenize_sexpr_inner( closed = true; break; } + if sc == '\\' { + chars.next(); // consume backslash + match chars.peek() { + Some(&esc @ ('"' | '\\')) => { + string_val.push(esc); + chars.next(); + } + // Unrecognized escape: keep the backslash verbatim + _ => string_val.push('\\'), + } + continue; + } string_val.push(sc); chars.next(); } @@ -257,9 +269,14 @@ pub fn split_first_token(s: &str) -> Result<(&str, &str)> { pub fn find_matching_paren(s: &str) -> Result { let mut depth = 0; let mut in_quote = false; - for (i, c) in s.chars().enumerate() { + let mut escaped = false; + for (i, c) in s.char_indices() { if in_quote { - if c == '"' { + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == '"' { in_quote = false; } continue; @@ -294,8 +311,21 @@ pub fn find_matching_paren(s: &str) -> Result { /// ``` pub fn find_matching_bracket(s: &str) -> Result { let mut depth = 0; - for (i, c) in s.chars().enumerate() { + let mut in_quote = false; + let mut escaped = false; + for (i, c) in s.char_indices() { + if in_quote { + if escaped { + escaped = false; + } else if c == '\\' { + escaped = true; + } else if c == '"' { + in_quote = false; + } + continue; + } match c { + '"' => in_quote = true, '[' => depth += 1, ']' => { depth -= 1; @@ -367,6 +397,28 @@ mod tests { assert!(matches!(&b_list[2], SexprToken::Atom(s) if s == "false")); } + #[test] + fn test_tokenize_escaped_quote_in_string() { + let tokens = tokenize_sexpr(r#"(concat ?x "he said \"hi\" (loudly)")"#).unwrap(); + let list = tokens[0].as_list().unwrap(); + assert_eq!(list.len(), 3); + assert!(matches!(&list[2], SexprToken::String(s) if s == r#"he said "hi" (loudly)"#)); + } + + #[test] + fn test_find_matching_paren_skips_escaped_quotes() { + let s = r#"(fulltext ?c "a \") b") tail"#; + let end = find_matching_paren(s).unwrap(); + assert_eq!(&s[..=end], r#"(fulltext ?c "a \") b")"#); + } + + #[test] + fn test_find_matching_bracket_skips_quoted_brackets() { + let s = r#"["a]b" "c" ] tail"#; + let end = find_matching_bracket(s).unwrap(); + assert_eq!(&s[..=end], r#"["a]b" "c" ]"#); + } + #[test] fn test_tokenize_unclosed_string() { let result = tokenize_sexpr(r#"(foo "unclosed)"#);