From b03ecbe6c769acd262b3bccc0cc7303a8a5bd927 Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 00:04:13 +0100
Subject: [PATCH 01/10] Implement unicode escaping

---
 rust/common/error/mod.rs |  2 ++
 rust/value.rs            | 60 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/rust/common/error/mod.rs b/rust/common/error/mod.rs
index 6f4eac01..52877f68 100644
--- a/rust/common/error/mod.rs
+++ b/rust/common/error/mod.rs
@@ -84,6 +84,8 @@ error_messages! { TypeQLError
         6: "Encountered invalid escape sequence {escape:?} while parsing {full_string:?}.",
     ReservedKeywordAsIdentifier { identifier: Identifier } =
         7: "A reserved keyword '{identifier}' was used as identifier.",
+    InvalidUnicodeEscapeInString { escape: String, full_string: String } =
+        8: "Encountered an invalid unicode escape sequence {escape:?} while parsing {full_string:?}.",
 /*
     MissingPatterns =
         5: "The query has not been provided with any patterns.",
diff --git a/rust/value.rs b/rust/value.rs
index 564d749d..461c54ce 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -342,6 +342,16 @@ impl fmt::Display for StructLiteral {
 }
 
 impl StringLiteral {
+    fn unescape_unicode<'a>(bytes: &'a [u8]) -> std::result::Result<char, &'a str> {
+        let as_hex = std::str::from_utf8(bytes).expect("Should still be utf8");
+        if bytes.len() == 4 {
+            let as_u32 = u32::from_str_radix(as_hex, 16).map_err(|_| as_hex)?;
+            char::from_u32(as_u32).ok_or(as_hex)
+        } else {
+            Err(as_hex)
+        }
+    }
+
     pub fn unescape(&self) -> Result<String> {
         self.process_unescape(|bytes, _buf, rest| match bytes[1] {
             BSP => Ok(('\x08', 2)),
@@ -350,7 +360,13 @@ impl StringLiteral {
             FF_ => Ok(('\x0c', 2)),
             CR_ => Ok(('\x0d', 2)),
             c @ (b'"' | b'\'' | b'\\') => Ok((c as char, 2)),
-            b'u' => todo!("Unicode escape handling"),
+            b'u' => Self::unescape_unicode(&bytes[2..std::cmp::min(6, bytes.len())]).map(|c| (c, 6)).map_err(|hex| {
+                TypeQLError::InvalidUnicodeEscapeInString {
+                    full_string: rest.to_owned(),
+                    escape: format!(r"\u{}", hex),
+                }
+                .into()
+            }),
             _ => Err(TypeQLError::InvalidStringEscape {
                 full_string: rest.to_owned(),
                 escape: format!(r"\{}", rest.chars().nth(1).unwrap()),
@@ -407,3 +423,45 @@ const TAB: u8 = b't';
 const LF_: u8 = b'n';
 const FF_: u8 = b'f';
 const CR_: u8 = b'r';
+
+#[cfg(test)]
+pub mod tests {
+    use crate::value::TypeQLError;
+    #[test]
+    fn test_unicode_unescape() {
+        {
+            // Works
+            let escaped = r#""... \u0ca0\u005f\u0ca0""#;
+            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
+                panic!("Not parsed as string");
+            };
+            assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ_ಠ");
+        }
+
+        {
+            // Not enough bytes
+            let escaped = r#""... \u012""#;
+            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
+                panic!("Not parsed as string");
+            };
+            let error = parsed.unescape().unwrap_err();
+            let TypeQLError::InvalidUnicodeEscapeInString { escape, .. } = &error.errors()[0] else {
+                panic!("Wrong error type. Was {error:?}")
+            };
+            assert_eq!(escape, r"\u012");
+        }
+
+        {
+            // Invalid hex
+            let escaped = r#""... \uwu/ ...""#;
+            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
+                panic!("Not parsed as string");
+            };
+            let error = parsed.unescape().unwrap_err();
+            let TypeQLError::InvalidUnicodeEscapeInString { escape, .. } = &error.errors()[0] else {
+                panic!("Wrong error type. Was {error:?}")
+            };
+            assert_eq!(escape, r"\uwu/ ");
+        }
+    }
+}

From 6671c1fad981f43ddf354d64a880c2c743e6b15f Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 00:07:44 +0100
Subject: [PATCH 02/10] add test usint capitals

---
 rust/value.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/rust/value.rs b/rust/value.rs
index 461c54ce..dd93a87a 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -438,6 +438,15 @@ pub mod tests {
             assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ_ಠ");
         }
 
+        {
+            // Capital hex works too
+            let escaped = r#""... \u0CA0\u005F\u0CA0""#;
+            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
+                panic!("Not parsed as string");
+            };
+            assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ_ಠ");
+        }
+
         {
             // Not enough bytes
             let escaped = r#""... \u012""#;

From 569070d8cae2c71555fa3670013e2ffa990b4870 Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 13:01:59 +0100
Subject: [PATCH 03/10] Problem with rest. I might yeet

---
 rust/value.rs | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/rust/value.rs b/rust/value.rs
index dd93a87a..d99beefc 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -360,13 +360,17 @@ impl StringLiteral {
             FF_ => Ok(('\x0c', 2)),
             CR_ => Ok(('\x0d', 2)),
             c @ (b'"' | b'\'' | b'\\') => Ok((c as char, 2)),
-            b'u' => Self::unescape_unicode(&bytes[2..std::cmp::min(6, bytes.len())]).map(|c| (c, 6)).map_err(|hex| {
-                TypeQLError::InvalidUnicodeEscapeInString {
-                    full_string: rest.to_owned(),
-                    escape: format!(r"\u{}", hex),
+            b'u' => {
+                compile_error!("Our 'escape' fields are wrong because \"rest\" isn't acutally rest here.");
+                let escape = &bytes[2..std::cmp::min(6, bytes.len())];
+                match decode_four_hex_bytes(escape) {
+                    Some(char) => Ok((char, 6)),
+                    None => Err(TypeQLError::InvalidUnicodeEscapeInString {
+                                full_string: rest.to_owned(),
+                                escape: format!(r"\u{}", &rest[2..6]),
+                            }.into())
                 }
-                .into()
-            }),
+            },
             _ => Err(TypeQLError::InvalidStringEscape {
                 full_string: rest.to_owned(),
                 escape: format!(r"\{}", rest.chars().nth(1).unwrap()),
@@ -424,6 +428,21 @@ const LF_: u8 = b'n';
 const FF_: u8 = b'f';
 const CR_: u8 = b'r';
 
+#[allow(arithmetic_overflow)]
+fn decode_four_hex_bytes(bytes: &[u8]) -> Option<char> {
+    if bytes.len() == 4 {
+        let u32_le: u32 = 0u32
+            | (bytes[0] as char).to_digit(16)? << 12
+            | (bytes[1] as char).to_digit(16)? <<  8
+            | (bytes[2] as char).to_digit(16)? <<  4
+            | (bytes[3] as char).to_digit(16)? <<  0 ;
+        debug_assert!(char::from_u32(u32_le).is_some());
+        char::from_u32(u32_le)
+    } else {
+        None
+    }
+}
+
 #[cfg(test)]
 pub mod tests {
     use crate::value::TypeQLError;
@@ -447,6 +466,15 @@ pub mod tests {
             assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ_ಠ");
         }
 
+        {
+            // Longer ones are just
+            let escaped = r#""... \u0CA01234""#;
+            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
+                panic!("Not parsed as string");
+            };
+            assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ1234");
+        }
+
         {
             // Not enough bytes
             let escaped = r#""... \u012""#;

From b48cbb44aa0b909127a641544dd3371928e8cf76 Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 14:28:32 +0100
Subject: [PATCH 04/10] Add lots of tests too

---
 rust/common/error/mod.rs |   2 -
 rust/value.rs            | 173 +++++++++++++++++++++------------------
 2 files changed, 94 insertions(+), 81 deletions(-)

diff --git a/rust/common/error/mod.rs b/rust/common/error/mod.rs
index 52877f68..6f4eac01 100644
--- a/rust/common/error/mod.rs
+++ b/rust/common/error/mod.rs
@@ -84,8 +84,6 @@ error_messages! { TypeQLError
         6: "Encountered invalid escape sequence {escape:?} while parsing {full_string:?}.",
     ReservedKeywordAsIdentifier { identifier: Identifier } =
         7: "A reserved keyword '{identifier}' was used as identifier.",
-    InvalidUnicodeEscapeInString { escape: String, full_string: String } =
-        8: "Encountered an invalid unicode escape sequence {escape:?} while parsing {full_string:?}.",
 /*
     MissingPatterns =
         5: "The query has not been provided with any patterns.",
diff --git a/rust/value.rs b/rust/value.rs
index d99beefc..90118f05 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -353,42 +353,39 @@ impl StringLiteral {
     }
 
     pub fn unescape(&self) -> Result<String> {
-        self.process_unescape(|bytes, _buf, rest| match bytes[1] {
-            BSP => Ok(('\x08', 2)),
-            TAB => Ok(('\x09', 2)),
-            LF_ => Ok(('\x0a', 2)),
-            FF_ => Ok(('\x0c', 2)),
-            CR_ => Ok(('\x0d', 2)),
-            c @ (b'"' | b'\'' | b'\\') => Ok((c as char, 2)),
-            b'u' => {
-                compile_error!("Our 'escape' fields are wrong because \"rest\" isn't acutally rest here.");
-                let escape = &bytes[2..std::cmp::min(6, bytes.len())];
-                match decode_four_hex_bytes(escape) {
-                    Some(char) => Ok((char, 6)),
-                    None => Err(TypeQLError::InvalidUnicodeEscapeInString {
-                                full_string: rest.to_owned(),
-                                escape: format!(r"\u{}", &rest[2..6]),
-                            }.into())
+        self.process_unescape(|bytes| {
+            if bytes.len() < 2 {
+                return Err(1);
+            }
+            match bytes[1] {
+                BSP => Ok(('\x08', 2)),
+                TAB => Ok(('\x09', 2)),
+                LF_ => Ok(('\x0a', 2)),
+                FF_ => Ok(('\x0c', 2)),
+                CR_ => Ok(('\x0d', 2)),
+                c @ (b'"' | b'\'' | b'\\') => Ok((c as char, 2)),
+                b'u' => {
+                    let escape = &bytes[2..std::cmp::min(6, bytes.len())];
+                    match decode_four_hex_bytes(escape) {
+                        Some(char) => Ok((char, 6)),
+                        None => Err(6),
+                    }
                 }
-            },
-            _ => Err(TypeQLError::InvalidStringEscape {
-                full_string: rest.to_owned(),
-                escape: format!(r"\{}", rest.chars().nth(1).unwrap()),
+                _ => Err(2),
             }
-            .into()),
         })
     }
 
     pub fn unescape_regex(&self) -> Result<String> {
-        self.process_unescape(|bytes, _, _| match bytes[1] {
-            c @ b'"' => Ok((c as char, 2)),
+        self.process_unescape(|bytes| match bytes.get(1) {
+            Some(b'"') => Ok(('"', 2)),
             _ => Ok(('\\', 1)),
         })
     }
 
     fn process_unescape<F>(&self, escape_handler: F) -> Result<String>
     where
-        F: Fn(&[u8], &mut String, &str) -> Result<(char, usize)>,
+        F: Fn(&[u8]) -> std::result::Result<(char, usize), usize>,
     {
         let bytes = self.value.as_bytes();
         assert_eq!(bytes[0], bytes[bytes.len() - 1]);
@@ -400,17 +397,13 @@ impl StringLiteral {
 
         while !rest.is_empty() {
             let (char, escaped_len) = if rest.as_bytes()[0] == b'\\' {
-                let bytes = rest.as_bytes();
-
-                if bytes.len() < 2 {
-                    return Err(TypeQLError::InvalidStringEscape {
+                escape_handler(rest.as_bytes()).map_err(|expected_escaped_len| {
+                    let safe_len = std::cmp::min(rest.len(), expected_escaped_len);
+                    Into::<crate::common::error::Error>::into(TypeQLError::InvalidStringEscape {
                         full_string: escaped_string.to_owned(),
-                        escape: String::from(r"\"),
-                    }
-                    .into());
-                }
-
-                escape_handler(bytes, &mut buf, escaped_string)?
+                        escape: rest[..safe_len].to_owned(),
+                    })
+                })?
             } else {
                 let char = rest.chars().next().expect("string is non-empty");
                 (char, char.len_utf8())
@@ -433,9 +426,9 @@ fn decode_four_hex_bytes(bytes: &[u8]) -> Option<char> {
     if bytes.len() == 4 {
         let u32_le: u32 = 0u32
             | (bytes[0] as char).to_digit(16)? << 12
-            | (bytes[1] as char).to_digit(16)? <<  8
-            | (bytes[2] as char).to_digit(16)? <<  4
-            | (bytes[3] as char).to_digit(16)? <<  0 ;
+            | (bytes[1] as char).to_digit(16)? << 8
+            | (bytes[2] as char).to_digit(16)? << 4
+            | (bytes[3] as char).to_digit(16)? << 0;
         debug_assert!(char::from_u32(u32_le).is_some());
         char::from_u32(u32_le)
     } else {
@@ -445,60 +438,82 @@ fn decode_four_hex_bytes(bytes: &[u8]) -> Option<char> {
 
 #[cfg(test)]
 pub mod tests {
-    use crate::value::TypeQLError;
+    use crate::{
+        value::{StringLiteral, TypeQLError},
+        Result,
+    };
+
+    fn parse_to_string_literal(escaped: &str) -> StringLiteral {
+        let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
+            panic!("Not parsed as string");
+        };
+        parsed
+    }
+
     #[test]
-    fn test_unicode_unescape() {
+    fn test_unescape_regex() {
         {
-            // Works
-            let escaped = r#""... \u0ca0\u005f\u0ca0""#;
-            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
-                panic!("Not parsed as string");
-            };
-            assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ_ಠ");
+            let escaped = r#""a\"b\"c""#;
+            let unescaped = parse_to_string_literal(escaped).unescape_regex().unwrap();
+            assert_eq!(unescaped.as_str(), r#"a"b"c"#);
         }
-
         {
-            // Capital hex works too
-            let escaped = r#""... \u0CA0\u005F\u0CA0""#;
-            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
-                panic!("Not parsed as string");
-            };
-            assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ_ಠ");
+            let escaped = r#""abc\123""#;
+            let unescaped = parse_to_string_literal(escaped).unescape_regex().unwrap();
+            assert_eq!(unescaped.as_str(), r#"abc\123"#);
         }
-
+        // Cases that fail at parsing
         {
-            // Longer ones are just
-            let escaped = r#""... \u0CA01234""#;
-            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
-                panic!("Not parsed as string");
-            };
-            assert_eq!(parsed.unescape().unwrap().as_str(), "... ಠ1234");
+            let escaped = r#""abc\""#;
+            assert!(crate::parse_value(escaped).is_err()); // Parsing fails as incomplete string literal
+            let string_literal = StringLiteral { value: escaped.to_owned() };
+            let unescaped = string_literal.unescape_regex().unwrap();
+            assert_eq!(unescaped.as_str(), r#"abc\"#);
         }
+    }
 
-        {
-            // Not enough bytes
-            let escaped = r#""... \u012""#;
-            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
-                panic!("Not parsed as string");
-            };
-            let error = parsed.unescape().unwrap_err();
-            let TypeQLError::InvalidUnicodeEscapeInString { escape, .. } = &error.errors()[0] else {
-                panic!("Wrong error type. Was {error:?}")
-            };
-            assert_eq!(escape, r"\u012");
-        }
+    fn assert_unescapes_to(escaped: &str, expected: &str) {
+        let unescaped = parse_to_string_literal(escaped).unescape().unwrap();
+        assert_eq!(unescaped, expected);
+    }
+
+    fn assert_unescape_errors(escaped: &str, expected_escape_sequence: &str) {
+        let error = parse_to_string_literal(escaped).unescape().unwrap_err();
+        let TypeQLError::InvalidStringEscape { escape, .. } = &error.errors()[0] else {
+            panic!("Wrong error type. Was {error:?}")
+        };
+        assert_eq!(escape, expected_escape_sequence);
+    }
 
+    #[test]
+    fn test_unescape() {
+        // Succeeds
+        assert_unescapes_to(r#""a\tb\tc""#, "a\tb\tc"); // works
+        assert_unescapes_to(r#""a\"b\"c""#, r#"a"b"c"#); // works
+        assert_unescapes_to(r#""a\'b\'c""#, r#"a'b'c"#); // works
+        assert_unescapes_to(r#""a\\b\\c""#, r#"a\b\c"#); // works
+                                                         //  - Unicode
+        assert_unescapes_to(r#""abc \u0ca0\u005f\u0ca0""#, "abc ಠ_ಠ"); // works
+        assert_unescapes_to(r#""abc \u0CA0\u005F\u0CA0""#, "abc ಠ_ಠ"); // caps
+        assert_unescapes_to(r#""abc \u0CA01234""#, "abc ಠ1234"); // consumes only 4
+
+        // Errors
+        assert_unescape_errors(r#""ab\c""#, r"\c"); // Invalid escape
+
+        //  - Unicode
+        assert_unescape_errors(r#""abc \u""#, r"\u"); // Not enough bytes
+        assert_unescape_errors(r#""abc \u012""#, r"\u012"); // Not enough bytes
+        assert_unescape_errors(r#""abc \uwu/ abc""#, r"\uwu/ "); // Invalid hex
+                                                                 // Cases that fail at parsing
         {
-            // Invalid hex
-            let escaped = r#""... \uwu/ ...""#;
-            let crate::ValueLiteral::String(parsed) = crate::parse_value(escaped).unwrap() else {
-                panic!("Not parsed as string");
-            };
-            let error = parsed.unescape().unwrap_err();
-            let TypeQLError::InvalidUnicodeEscapeInString { escape, .. } = &error.errors()[0] else {
+            let escaped = r#""abc\""#;
+            assert!(crate::parse_value(escaped).is_err()); // Parsing fails as incomplete string literal
+            let string_literal = StringLiteral { value: escaped.to_owned() };
+            let error = string_literal.unescape().unwrap_err();
+            let TypeQLError::InvalidStringEscape { escape, .. } = &error.errors()[0] else {
                 panic!("Wrong error type. Was {error:?}")
             };
-            assert_eq!(escape, r"\uwu/ ");
+            assert_eq!(escape, r#"\"#);
         }
     }
 }

From d454420f60ca4d4b39cf78307b33d6a5a6d79fa3 Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 14:57:34 +0100
Subject: [PATCH 05/10] Fix ugly into + unsafe slicing

---
 rust/value.rs | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/rust/value.rs b/rust/value.rs
index 90118f05..b4046d8e 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -397,13 +397,15 @@ impl StringLiteral {
 
         while !rest.is_empty() {
             let (char, escaped_len) = if rest.as_bytes()[0] == b'\\' {
-                escape_handler(rest.as_bytes()).map_err(|expected_escaped_len| {
-                    let safe_len = std::cmp::min(rest.len(), expected_escaped_len);
-                    Into::<crate::common::error::Error>::into(TypeQLError::InvalidStringEscape {
-                        full_string: escaped_string.to_owned(),
-                        escape: rest[..safe_len].to_owned(),
-                    })
-                })?
+                match escape_handler(rest.as_bytes()) {
+                    Ok((char, escaped_len)) => (char, escaped_len),
+                    Err(considered_escape_byte_length) => {
+                        return Err(TypeQLError::InvalidStringEscape {
+                            full_string: escaped_string.to_owned(),
+                            escape: rest.chars().take(considered_escape_byte_length).collect(),
+                        }.into());
+                    }
+                }
             } else {
                 let char = rest.chars().next().expect("string is non-empty");
                 (char, char.len_utf8())

From 9f10deea8cc356ece1f49f290585d4f3455ca630 Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 21:21:04 +0100
Subject: [PATCH 06/10] Add a test to measure performance, but I've broken the
 implementation

---
 rust/BUILD    |  1 +
 rust/value.rs | 94 ++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 72 insertions(+), 23 deletions(-)

diff --git a/rust/BUILD b/rust/BUILD
index afa26bb6..cb7ee4c5 100644
--- a/rust/BUILD
+++ b/rust/BUILD
@@ -40,6 +40,7 @@ rust_test(
     deps = [
         "@crates//:syn",
         "@crates//:proc-macro2",
+        "@crates//:rand",
     ],
 )
 
diff --git a/rust/value.rs b/rust/value.rs
index b4046d8e..b930edd8 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -342,16 +342,6 @@ impl fmt::Display for StructLiteral {
 }
 
 impl StringLiteral {
-    fn unescape_unicode<'a>(bytes: &'a [u8]) -> std::result::Result<char, &'a str> {
-        let as_hex = std::str::from_utf8(bytes).expect("Should still be utf8");
-        if bytes.len() == 4 {
-            let as_u32 = u32::from_str_radix(as_hex, 16).map_err(|_| as_hex)?;
-            char::from_u32(as_u32).ok_or(as_hex)
-        } else {
-            Err(as_hex)
-        }
-    }
-
     pub fn unescape(&self) -> Result<String> {
         self.process_unescape(|bytes| {
             if bytes.len() < 2 {
@@ -392,28 +382,31 @@ impl StringLiteral {
         assert!(matches!(bytes[0], b'\'' | b'"'));
 
         let escaped_string = &self.value[1..self.value.len() - 1];
-        let mut buf = String::with_capacity(escaped_string.len());
+        let mut buf = Vec::with_capacity(escaped_string.len());
         let mut rest = escaped_string;
-
         while !rest.is_empty() {
-            let (char, escaped_len) = if rest.as_bytes()[0] == b'\\' {
+            let escaped_len = if rest.as_bytes()[0] == b'\\' {
                 match escape_handler(rest.as_bytes()) {
-                    Ok((char, escaped_len)) => (char, escaped_len),
+                    Ok((char, escaped_len)) => {
+                        let start = buf.len();
+                        buf.resize(buf.len() + char.len_utf8(),0);
+                        char.encode_utf8(&mut buf[start..]);
+                        rest = &rest[escaped_len..];
+                    },
                     Err(considered_escape_byte_length) => {
+                        let considered_escape_sequence = rest.chars().take(considered_escape_byte_length).collect();
                         return Err(TypeQLError::InvalidStringEscape {
                             full_string: escaped_string.to_owned(),
-                            escape: rest.chars().take(considered_escape_byte_length).collect(),
+                            escape: considered_escape_sequence,
                         }.into());
                     }
                 }
             } else {
-                let char = rest.chars().next().expect("string is non-empty");
-                (char, char.len_utf8())
+                buf.push(rest.as_bytes()[0]);
+                rest = &rest[1..];
             };
-            buf.push(char);
-            rest = &rest[escaped_len..];
         }
-        Ok(buf)
+        Ok(String::from_utf8(buf).expect("Expected valid utf8").to_owned())
     }
 }
 
@@ -494,7 +487,7 @@ pub mod tests {
         assert_unescapes_to(r#""a\"b\"c""#, r#"a"b"c"#); // works
         assert_unescapes_to(r#""a\'b\'c""#, r#"a'b'c"#); // works
         assert_unescapes_to(r#""a\\b\\c""#, r#"a\b\c"#); // works
-                                                         //  - Unicode
+        //  - Unicode
         assert_unescapes_to(r#""abc \u0ca0\u005f\u0ca0""#, "abc ಠ_ಠ"); // works
         assert_unescapes_to(r#""abc \u0CA0\u005F\u0CA0""#, "abc ಠ_ಠ"); // caps
         assert_unescapes_to(r#""abc \u0CA01234""#, "abc ಠ1234"); // consumes only 4
@@ -506,7 +499,7 @@ pub mod tests {
         assert_unescape_errors(r#""abc \u""#, r"\u"); // Not enough bytes
         assert_unescape_errors(r#""abc \u012""#, r"\u012"); // Not enough bytes
         assert_unescape_errors(r#""abc \uwu/ abc""#, r"\uwu/ "); // Invalid hex
-                                                                 // Cases that fail at parsing
+        // Cases that fail at parsing
         {
             let escaped = r#""abc\""#;
             assert!(crate::parse_value(escaped).is_err()); // Parsing fails as incomplete string literal
@@ -518,4 +511,59 @@ pub mod tests {
             assert_eq!(escape, r#"\"#);
         }
     }
-}
+
+    #[test]
+    fn time_unescape_ascii() {
+        let text = generate_string(TIME_UNESCAPE_TEXT_LEN, |x| 32 + (x % 94));
+        time_unescape(text);
+    }
+
+    #[test]
+    fn time_unescape_unicode() {
+        // assert_eq!(None, (0..0x07ff).filter(|x| char::from_u32(*x).is_none()).next());
+        let text = generate_string(TIME_UNESCAPE_TEXT_LEN, move |x| x & 0x07ff);
+        time_unescape(text);
+    }
+
+    const TIME_UNESCAPE_TEXT_LEN: usize = 100000;
+    fn time_unescape(text: String) {
+        use std::time::Instant;
+        let iters = 10000;
+
+        let string_literal = StringLiteral { value: text };
+        let start = Instant::now();
+        for _ in 0..iters {
+            string_literal.unescape().unwrap();
+        }
+        let end = Instant::now();
+        println!("{iters} on string of length {} iters in {}", string_literal.value.as_str().len(), (end - start).as_secs_f64())
+    }
+
+    fn generate_string(length: usize, mapper: fn(u32) -> u32) -> String {
+        use rand::{thread_rng, Rng, RngCore};
+        let mut rng = thread_rng();
+        let capacity: i64 = (1.2 * length as f64).ceil() as i64;
+        let mut text = String::with_capacity(capacity as usize);
+        text.push('"');
+        let mut sanity: i64 = capacity;
+        while text.as_str().len() < length+1 && sanity >= 0 {
+            sanity -= 1;
+            match char::from_u32(mapper(rng.next_u32())) {
+                Some('\\')  => { text.push('\\'); text.push('\\'); }
+                Some('\'') => { text.push('\\'); text.push('\''); }
+                Some('\"') => { text.push('\\'); text.push('\"'); }
+                Some('\x08') => { text.push('\\'); text.push('b'); }
+                Some('\x09') => { text.push('\\'); text.push('t'); }
+                Some('\x0a') => { text.push('\\'); text.push('n'); }
+                Some('\x0c') => { text.push('\\'); text.push('f'); }
+                Some('\x0d') => { text.push('\\'); text.push('r'); }
+                Some(ch) => { text.push(ch) },
+                None => {}
+            }
+        }
+        text.push('"');
+        assert!(text.as_str().len() > length && text.as_str().len() < length + 10);
+        text
+    }
+
+}
\ No newline at end of file

From 783e0b281cc8185b0f55a3d844f611f88171f74e Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 21:22:42 +0100
Subject: [PATCH 07/10] Revert the implementation

---
 rust/value.rs | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/rust/value.rs b/rust/value.rs
index b930edd8..2f4e2f91 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -341,7 +341,18 @@ impl fmt::Display for StructLiteral {
     }
 }
 
+
 impl StringLiteral {
+    fn unescape_unicode<'a>(bytes: &'a [u8]) -> std::result::Result<char, &'a str> {
+        let as_hex = std::str::from_utf8(bytes).expect("Should still be utf8");
+        if bytes.len() == 4 {
+            let as_u32 = u32::from_str_radix(as_hex, 16).map_err(|_| as_hex)?;
+            char::from_u32(as_u32).ok_or(as_hex)
+        } else {
+            Err(as_hex)
+        }
+    }
+
     pub fn unescape(&self) -> Result<String> {
         self.process_unescape(|bytes| {
             if bytes.len() < 2 {
@@ -382,31 +393,28 @@ impl StringLiteral {
         assert!(matches!(bytes[0], b'\'' | b'"'));
 
         let escaped_string = &self.value[1..self.value.len() - 1];
-        let mut buf = Vec::with_capacity(escaped_string.len());
+        let mut buf = String::with_capacity(escaped_string.len());
         let mut rest = escaped_string;
+
         while !rest.is_empty() {
-            let escaped_len = if rest.as_bytes()[0] == b'\\' {
+            let (char, escaped_len) = if rest.as_bytes()[0] == b'\\' {
                 match escape_handler(rest.as_bytes()) {
-                    Ok((char, escaped_len)) => {
-                        let start = buf.len();
-                        buf.resize(buf.len() + char.len_utf8(),0);
-                        char.encode_utf8(&mut buf[start..]);
-                        rest = &rest[escaped_len..];
-                    },
+                    Ok((char, escaped_len)) => (char, escaped_len),
                     Err(considered_escape_byte_length) => {
-                        let considered_escape_sequence = rest.chars().take(considered_escape_byte_length).collect();
                         return Err(TypeQLError::InvalidStringEscape {
                             full_string: escaped_string.to_owned(),
-                            escape: considered_escape_sequence,
+                            escape: rest.chars().take(considered_escape_byte_length).collect(),
                         }.into());
                     }
                 }
             } else {
-                buf.push(rest.as_bytes()[0]);
-                rest = &rest[1..];
+                let char = rest.chars().next().expect("string is non-empty");
+                (char, char.len_utf8())
             };
+            buf.push(char);
+            rest = &rest[escaped_len..];
         }
-        Ok(String::from_utf8(buf).expect("Expected valid utf8").to_owned())
+        Ok(buf)
     }
 }
 

From a730d2bd302c5ee373a842c95d70ae30754d6f9d Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 21:24:03 +0100
Subject: [PATCH 08/10] And replay the brokn stuff so I can fix it

---
 rust/value.rs | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/rust/value.rs b/rust/value.rs
index 2f4e2f91..b930edd8 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -341,18 +341,7 @@ impl fmt::Display for StructLiteral {
     }
 }
 
-
 impl StringLiteral {
-    fn unescape_unicode<'a>(bytes: &'a [u8]) -> std::result::Result<char, &'a str> {
-        let as_hex = std::str::from_utf8(bytes).expect("Should still be utf8");
-        if bytes.len() == 4 {
-            let as_u32 = u32::from_str_radix(as_hex, 16).map_err(|_| as_hex)?;
-            char::from_u32(as_u32).ok_or(as_hex)
-        } else {
-            Err(as_hex)
-        }
-    }
-
     pub fn unescape(&self) -> Result<String> {
         self.process_unescape(|bytes| {
             if bytes.len() < 2 {
@@ -393,28 +382,31 @@ impl StringLiteral {
         assert!(matches!(bytes[0], b'\'' | b'"'));
 
         let escaped_string = &self.value[1..self.value.len() - 1];
-        let mut buf = String::with_capacity(escaped_string.len());
+        let mut buf = Vec::with_capacity(escaped_string.len());
         let mut rest = escaped_string;
-
         while !rest.is_empty() {
-            let (char, escaped_len) = if rest.as_bytes()[0] == b'\\' {
+            let escaped_len = if rest.as_bytes()[0] == b'\\' {
                 match escape_handler(rest.as_bytes()) {
-                    Ok((char, escaped_len)) => (char, escaped_len),
+                    Ok((char, escaped_len)) => {
+                        let start = buf.len();
+                        buf.resize(buf.len() + char.len_utf8(),0);
+                        char.encode_utf8(&mut buf[start..]);
+                        rest = &rest[escaped_len..];
+                    },
                     Err(considered_escape_byte_length) => {
+                        let considered_escape_sequence = rest.chars().take(considered_escape_byte_length).collect();
                         return Err(TypeQLError::InvalidStringEscape {
                             full_string: escaped_string.to_owned(),
-                            escape: rest.chars().take(considered_escape_byte_length).collect(),
+                            escape: considered_escape_sequence,
                         }.into());
                     }
                 }
             } else {
-                let char = rest.chars().next().expect("string is non-empty");
-                (char, char.len_utf8())
+                buf.push(rest.as_bytes()[0]);
+                rest = &rest[1..];
             };
-            buf.push(char);
-            rest = &rest[escaped_len..];
         }
-        Ok(buf)
+        Ok(String::from_utf8(buf).expect("Expected valid utf8").to_owned())
     }
 }
 

From 2fdc3f0b3d33be362a536db7949b647db6942d95 Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 22:53:44 +0100
Subject: [PATCH 09/10] Quick one with just bytes: 0.94s on ascii, 1.28 on
 unicode

---
 rust/value.rs | 79 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 56 insertions(+), 23 deletions(-)

diff --git a/rust/value.rs b/rust/value.rs
index b930edd8..c8e39ffc 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -383,26 +383,29 @@ impl StringLiteral {
 
         let escaped_string = &self.value[1..self.value.len() - 1];
         let mut buf = Vec::with_capacity(escaped_string.len());
-        let mut rest = escaped_string;
+        let mut rest: &[u8] = escaped_string.as_bytes();
         while !rest.is_empty() {
-            let escaped_len = if rest.as_bytes()[0] == b'\\' {
-                match escape_handler(rest.as_bytes()) {
+            let escaped_len = if rest[0] == b'\\' {
+                match escape_handler(rest) {
                     Ok((char, escaped_len)) => {
                         let start = buf.len();
-                        buf.resize(buf.len() + char.len_utf8(),0);
+                        buf.resize(buf.len() + char.len_utf8(), 0);
                         char.encode_utf8(&mut buf[start..]);
                         rest = &rest[escaped_len..];
-                    },
-                    Err(considered_escape_byte_length) => {
-                        let considered_escape_sequence = rest.chars().take(considered_escape_byte_length).collect();
+                    }
+                    Err(considered_escape_seq_length) => {
+                        let offset = escaped_string.len() - rest.len();
+                        let considered_escape_sequence =
+                            escaped_string[offset..].chars().take(considered_escape_seq_length).collect();
                         return Err(TypeQLError::InvalidStringEscape {
                             full_string: escaped_string.to_owned(),
                             escape: considered_escape_sequence,
-                        }.into());
+                        }
+                        .into());
                     }
                 }
             } else {
-                buf.push(rest.as_bytes()[0]);
+                buf.push(rest[0]);
                 rest = &rest[1..];
             };
         }
@@ -487,7 +490,7 @@ pub mod tests {
         assert_unescapes_to(r#""a\"b\"c""#, r#"a"b"c"#); // works
         assert_unescapes_to(r#""a\'b\'c""#, r#"a'b'c"#); // works
         assert_unescapes_to(r#""a\\b\\c""#, r#"a\b\c"#); // works
-        //  - Unicode
+                                                         //  - Unicode
         assert_unescapes_to(r#""abc \u0ca0\u005f\u0ca0""#, "abc ಠ_ಠ"); // works
         assert_unescapes_to(r#""abc \u0CA0\u005F\u0CA0""#, "abc ಠ_ಠ"); // caps
         assert_unescapes_to(r#""abc \u0CA01234""#, "abc ಠ1234"); // consumes only 4
@@ -499,6 +502,9 @@ pub mod tests {
         assert_unescape_errors(r#""abc \u""#, r"\u"); // Not enough bytes
         assert_unescape_errors(r#""abc \u012""#, r"\u012"); // Not enough bytes
         assert_unescape_errors(r#""abc \uwu/ abc""#, r"\uwu/ "); // Invalid hex
+        assert_unescape_errors(r#""abc \uΣ12Σ abc""#, r"\uΣ12Σ"); // Invalid hex, 4 chars more than 4 bytes
+        assert_unescape_errors(r#""abc \u123Σ abc""#, r"\u123Σ"); // Invalid hex, 4 chars more than 4 bytes
+
         // Cases that fail at parsing
         {
             let escaped = r#""abc\""#;
@@ -536,7 +542,11 @@ pub mod tests {
             string_literal.unescape().unwrap();
         }
         let end = Instant::now();
-        println!("{iters} on string of length {} iters in {}", string_literal.value.as_str().len(), (end - start).as_secs_f64())
+        println!(
+            "{iters} on string of length {} iters in {}",
+            string_literal.value.as_str().len(),
+            (end - start).as_secs_f64()
+        )
     }
 
     fn generate_string(length: usize, mapper: fn(u32) -> u32) -> String {
@@ -546,18 +556,42 @@ pub mod tests {
         let mut text = String::with_capacity(capacity as usize);
         text.push('"');
         let mut sanity: i64 = capacity;
-        while text.as_str().len() < length+1 && sanity >= 0 {
+        while text.as_str().len() < length + 1 && sanity >= 0 {
             sanity -= 1;
             match char::from_u32(mapper(rng.next_u32())) {
-                Some('\\')  => { text.push('\\'); text.push('\\'); }
-                Some('\'') => { text.push('\\'); text.push('\''); }
-                Some('\"') => { text.push('\\'); text.push('\"'); }
-                Some('\x08') => { text.push('\\'); text.push('b'); }
-                Some('\x09') => { text.push('\\'); text.push('t'); }
-                Some('\x0a') => { text.push('\\'); text.push('n'); }
-                Some('\x0c') => { text.push('\\'); text.push('f'); }
-                Some('\x0d') => { text.push('\\'); text.push('r'); }
-                Some(ch) => { text.push(ch) },
+                Some('\\') => {
+                    text.push('\\');
+                    text.push('\\');
+                }
+                Some('\'') => {
+                    text.push('\\');
+                    text.push('\'');
+                }
+                Some('\"') => {
+                    text.push('\\');
+                    text.push('\"');
+                }
+                Some('\x08') => {
+                    text.push('\\');
+                    text.push('b');
+                }
+                Some('\x09') => {
+                    text.push('\\');
+                    text.push('t');
+                }
+                Some('\x0a') => {
+                    text.push('\\');
+                    text.push('n');
+                }
+                Some('\x0c') => {
+                    text.push('\\');
+                    text.push('f');
+                }
+                Some('\x0d') => {
+                    text.push('\\');
+                    text.push('r');
+                }
+                Some(ch) => text.push(ch),
                 None => {}
             }
         }
@@ -565,5 +599,4 @@ pub mod tests {
         assert!(text.as_str().len() > length && text.as_str().len() < length + 10);
         text
     }
-
-}
\ No newline at end of file
+}

From ce73b3b9208227868263304f268c9b1de61ea711 Mon Sep 17 00:00:00 2001
From: Krishnan Govindraj <krishnan@typedb.com>
Date: Fri, 27 Mar 2026 23:04:41 +0100
Subject: [PATCH 10/10] Add ignore to the bench tests

---
 rust/value.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rust/value.rs b/rust/value.rs
index c8e39ffc..98783e20 100644
--- a/rust/value.rs
+++ b/rust/value.rs
@@ -518,12 +518,14 @@ pub mod tests {
         }
     }
 
+    #[ignore]
     #[test]
     fn time_unescape_ascii() {
         let text = generate_string(TIME_UNESCAPE_TEXT_LEN, |x| 32 + (x % 94));
         time_unescape(text);
     }
 
+    #[ignore]
     #[test]
     fn time_unescape_unicode() {
         // assert_eq!(None, (0..0x07ff).filter(|x| char::from_u32(*x).is_none()).next());