Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 52 additions & 2 deletions fluree-db-memory/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,20 @@ impl MemoryStore {
}

debug!("Creating memory ledger");
self.fluree.create_ledger(MEMORY_LEDGER).await?;
match self.fluree.create_ledger(MEMORY_LEDGER).await {
Ok(_) => {}
Err(fluree_db_api::ApiError::LedgerExists(_)) => {
// Concurrent tool calls race through `is_initialized() ==
// false` together (initialize takes no lock — mutation paths
// already hold the mutation lock when they call it, so it
// must not re-acquire). Losing the create race means another
// task is initializing; let it transact the schema.
debug!("Memory ledger created concurrently — skipping init");
self.ensure_file_structure()?;
return Ok(());
}
Err(e) => return Err(e.into()),
}

debug!("Transacting memory schema");
let schema = memory_schema_jsonld();
Expand Down Expand Up @@ -790,9 +803,13 @@ WHERE {{\n\
) -> Result<Vec<(String, f64)>> {
self.initialize().await?;

// Escape backslashes before quotes — the s-expression parser resolves
// `\\` and `\"`, and content with an unescaped `"` followed by `)`
// used to wedge the parser in an infinite loop (a permanently pinned
// tokio worker per occurrence).
let bind_expr = format!(
"(fulltext ?content \"{}\")",
query_text.replace('"', "\\\"")
query_text.replace('\\', "\\\\").replace('"', "\\\"")
);

let query = json!({
Expand Down Expand Up @@ -1216,6 +1233,39 @@ mod tests {
);
}

#[tokio::test]
async fn recall_fulltext_survives_quotes_and_parens_in_query() {
// Regression for the MCP hang: content with a quote-then-paren
// sequence, fed back as the related-memories recall query, used to
// wedge the s-expression parser in an infinite loop.
let fluree = FlureeBuilder::memory().build_memory();
let store = MemoryStore::new(fluree, None);
store.initialize().await.expect("initialize");

let content = r#"emit_list_item stores index metadata ("not rdf-list bnodes") and silently prunes operator-kept rows. Applies to the scalar-Eq push.""#;
let input = crate::types::MemoryInput {
kind: MemoryKind::Fact,
content: content.to_string(),
tags: vec!["repro".to_string()],
scope: crate::types::Scope::Repo,
severity: None,
artifact_refs: vec![],
branch: None,
rationale: Some(r#"backslash \ and "quotes" too"#.to_string()),
alternatives: None,
};
store.add(input).await.expect("add");

let hits = store
.recall_fulltext(content, 4)
.await
.expect("recall must parse, not hang");
assert!(
!hits.is_empty(),
"the just-added memory should match its own content"
);
}

#[tokio::test]
async fn drop_and_reinit_drops_by_whole_ledger_name() {
let fluree = FlureeBuilder::memory().build_memory();
Expand Down
152 changes: 134 additions & 18 deletions fluree-db-query/src/parse/filter_sexpr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
//!
//! # Limitations
//!
//! - Quoted strings cannot contain whitespace, parentheses, or escape sequences
//! - For complex string comparisons, use data expression format instead
//! - Quoted strings may contain whitespace, parentheses, and backslash
//! escapes (`\"` and `\\`); any other `\x` sequence is preserved verbatim

use super::ast::UnresolvedExpression;
use super::error::{ParseError, Result};
Expand All @@ -36,12 +36,8 @@ use std::sync::Arc;
/// - Atoms: `?var`, numbers, `true`/`false`, quoted strings `"text"`
/// - Expressions: `(op arg1 arg2 ...)`
/// - Nested: `(and (> ?x 10) (< ?y 100))`
///
/// # Limitations
/// - Quoted strings cannot contain whitespace, parentheses, or escape sequences
/// (e.g., `"Smith Jr"` with a space will not parse correctly)
/// - For complex string comparisons, use the data expression format instead:
/// `["filter", ["=", "?name", "Smith Jr"]]`
/// - Quoted strings may contain whitespace, parentheses, and backslash
/// escapes (`\"`, `\\`)
pub fn parse_s_expression(s: &str) -> Result<UnresolvedExpression> {
let s = s.trim();

Expand Down Expand Up @@ -108,6 +104,51 @@ pub fn parse_s_expression(s: &str) -> Result<UnresolvedExpression> {
}
}

/// Byte index just past the closing quote of the string literal at the start
/// of `s` (which must begin with `"`), honoring `\"` / `\\` escapes.
/// Returns `None` if the literal is unterminated.
fn quoted_string_end(s: &str) -> Option<usize> {
debug_assert!(s.starts_with('"'));
let mut escaped = false;
for (i, c) in s.char_indices().skip(1) {
if escaped {
escaped = false;
continue;
}
match c {
'\\' => escaped = true,
'"' => return Some(i + 1),
_ => {}
}
}
None
}

/// Resolve backslash escapes in a string literal's contents. Only `\"` and
/// `\\` are recognized; any other `\x` is preserved verbatim.
fn unescape_string(s: &str) -> String {
if !s.contains('\\') {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some(next @ ('"' | '\\')) => out.push(next),
Some(next) => {
out.push('\\');
out.push(next);
}
None => out.push('\\'),
}
} else {
out.push(c);
}
}
out
}

/// Parse an atom in an S-expression (variable, number, string, boolean)
fn parse_s_expression_atom(s: &str) -> Result<UnresolvedExpression> {
let s = s.trim();
Expand Down Expand Up @@ -136,7 +177,7 @@ fn parse_s_expression_atom(s: &str) -> Result<UnresolvedExpression> {
// String (might be quoted)
if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
let unquoted = &s[1..s.len() - 1];
return Ok(UnresolvedExpression::string(unquoted));
return Ok(UnresolvedExpression::string(unescape_string(unquoted)));
}

// Plain string
Expand All @@ -161,12 +202,11 @@ fn parse_s_expression_args(s: &str) -> Result<Vec<UnresolvedExpression>> {
args.push(parse_s_expression_list(expr_str)?);
remaining = remaining[end + 1..].trim();
} else if remaining.starts_with('"') {
// Handle quoted string as a single token (may contain whitespace/parens)
let after_open = &remaining[1..];
if let Some(close_pos) = after_open.find('"') {
let end = close_pos + 2; // include both quotes
let atom = &remaining[..end];
args.push(parse_s_expression_atom(atom)?);
// Handle quoted string as a single token (may contain
// whitespace/parens and `\"` / `\\` escapes)
if let Some(end) = quoted_string_end(remaining) {
let inner = &remaining[1..end - 1];
args.push(UnresolvedExpression::string(unescape_string(inner)));
remaining = remaining[end..].trim();
} else {
return Err(ParseError::InvalidFilter(
Expand All @@ -178,10 +218,15 @@ fn parse_s_expression_args(s: &str) -> Result<Vec<UnresolvedExpression>> {
let end = remaining
.find(|c: char| c.is_whitespace() || c == '(' || c == ')')
.unwrap_or(remaining.len());
if end > 0 {
let atom = &remaining[..end];
args.push(parse_s_expression_atom(atom)?);
if end == 0 {
// A stray ')' consumes nothing; erroring (rather than
// continuing) is what guarantees this loop terminates.
return Err(ParseError::InvalidFilter(format!(
"unexpected ')' in expression arguments: {remaining}"
)));
}
let atom = &remaining[..end];
args.push(parse_s_expression_atom(atom)?);
remaining = remaining[end..].trim();
}
}
Expand Down Expand Up @@ -233,9 +278,24 @@ fn parse_s_expression_arg(s: &str) -> Result<(UnresolvedExpression, &str)> {
let expr = parse_s_expression_list(expr_str)?;
return Ok((expr, &s[end + 1..]));
}
if s.starts_with('"') {
let end = quoted_string_end(s)
.ok_or_else(|| ParseError::InvalidFilter("unclosed string literal".to_string()))?;
let inner = &s[1..end - 1];
return Ok((
UnresolvedExpression::string(unescape_string(inner)),
&s[end..],
));
}
let end = s
.find(|c: char| c.is_whitespace() || c == '(' || c == ')' || c == '[' || c == ']')
.unwrap_or(s.len());
if end == 0 {
return Err(ParseError::InvalidFilter(format!(
"unexpected '{}' in expression argument",
&s[..s.chars().next().map_or(0, char::len_utf8)]
)));
}
let atom = &s[..end];
let expr = parse_s_expression_atom(atom)?;
Ok((expr, &s[end..]))
Expand Down Expand Up @@ -479,6 +539,62 @@ mod tests {
}
}

#[test]
fn test_stray_close_paren_errors_instead_of_looping() {
// Regression: a stray ')' in the argument list used to make
// parse_s_expression_args consume nothing and spin forever,
// permanently pinning a tokio worker at 100% CPU.
let result = parse_s_expression("(f ?x ))");
assert!(result.is_err(), "stray ')' must be a parse error");
}

#[test]
fn test_quoted_string_with_escapes_and_parens() {
// Escaped quotes inside a string literal must not terminate the
// token; parens inside the string are literal content.
let expr = parse_s_expression(r#"(fulltext ?content "prunes rows (see \"push.\") tail")"#)
.unwrap();
match expr {
UnresolvedExpression::Call { func, args } => {
assert_eq!(func.as_ref(), "fulltext");
assert_eq!(args.len(), 2);
match &args[1] {
UnresolvedExpression::Const(_) => {}
other => panic!("expected string constant, got {other:?}"),
}
}
_ => panic!("Expected Call"),
}
}

#[test]
fn test_fulltext_bind_roundtrip_with_hostile_content() {
// The exact shape that wedged the memory MCP server: user content
// containing a quote-then-paren, interpolated into a fulltext bind.
let content = r#"emit_list_item stores index metadata ("not rdf-list bnodes") and prunes operator-kept rows (scalar-Eq push.")"#;
let bind_expr = format!(
"(fulltext ?content \"{}\")",
content.replace('\\', "\\\\").replace('"', "\\\"")
);
let expr = parse_s_expression(&bind_expr).expect("must parse, not hang");
match expr {
UnresolvedExpression::Call { func, args } => {
assert_eq!(func.as_ref(), "fulltext");
assert_eq!(args.len(), 2);
}
_ => panic!("Expected Call"),
}
}

#[test]
fn test_unescape_string_helper() {
assert_eq!(unescape_string(r#"a\"b"#), r#"a"b"#);
assert_eq!(unescape_string(r"a\\b"), r"a\b");
// Unrecognized escapes are preserved verbatim
assert_eq!(unescape_string(r"a\nb"), r"a\nb");
assert_eq!(unescape_string("plain"), "plain");
}

#[test]
fn test_parse_single_arg_arithmetic() {
let expr = parse_s_expression("(+ ?x)").unwrap();
Expand Down
60 changes: 56 additions & 4 deletions fluree-db-query/src/parse/sexpr_tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ fn tokenize_sexpr_inner(
current.clear();
}
chars.next(); // consume opening quote
// Read until closing quote
// Read until closing quote, honoring \" and \\
let mut string_val = String::new();
let mut closed = false;
while let Some(&sc) = chars.peek() {
Expand All @@ -177,6 +177,18 @@ fn tokenize_sexpr_inner(
closed = true;
break;
}
if sc == '\\' {
chars.next(); // consume backslash
match chars.peek() {
Some(&esc @ ('"' | '\\')) => {
string_val.push(esc);
chars.next();
}
// Unrecognized escape: keep the backslash verbatim
_ => string_val.push('\\'),
}
continue;
}
string_val.push(sc);
chars.next();
}
Expand Down Expand Up @@ -257,9 +269,14 @@ pub fn split_first_token(s: &str) -> Result<(&str, &str)> {
pub fn find_matching_paren(s: &str) -> Result<usize> {
let mut depth = 0;
let mut in_quote = false;
for (i, c) in s.chars().enumerate() {
let mut escaped = false;
for (i, c) in s.char_indices() {
if in_quote {
if c == '"' {
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if c == '"' {
in_quote = false;
}
continue;
Expand Down Expand Up @@ -294,8 +311,21 @@ pub fn find_matching_paren(s: &str) -> Result<usize> {
/// ```
pub fn find_matching_bracket(s: &str) -> Result<usize> {
let mut depth = 0;
for (i, c) in s.chars().enumerate() {
let mut in_quote = false;
let mut escaped = false;
for (i, c) in s.char_indices() {
if in_quote {
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if c == '"' {
in_quote = false;
}
continue;
}
match c {
'"' => in_quote = true,
'[' => depth += 1,
']' => {
depth -= 1;
Expand Down Expand Up @@ -367,6 +397,28 @@ mod tests {
assert!(matches!(&b_list[2], SexprToken::Atom(s) if s == "false"));
}

#[test]
fn test_tokenize_escaped_quote_in_string() {
let tokens = tokenize_sexpr(r#"(concat ?x "he said \"hi\" (loudly)")"#).unwrap();
let list = tokens[0].as_list().unwrap();
assert_eq!(list.len(), 3);
assert!(matches!(&list[2], SexprToken::String(s) if s == r#"he said "hi" (loudly)"#));
}

#[test]
fn test_find_matching_paren_skips_escaped_quotes() {
let s = r#"(fulltext ?c "a \") b") tail"#;
let end = find_matching_paren(s).unwrap();
assert_eq!(&s[..=end], r#"(fulltext ?c "a \") b")"#);
}

#[test]
fn test_find_matching_bracket_skips_quoted_brackets() {
let s = r#"["a]b" "c" ] tail"#;
let end = find_matching_bracket(s).unwrap();
assert_eq!(&s[..=end], r#"["a]b" "c" ]"#);
}

#[test]
fn test_tokenize_unclosed_string() {
let result = tokenize_sexpr(r#"(foo "unclosed)"#);
Expand Down
Loading