From 4da8cf20457614f24288391c16e6d29ec0601afe Mon Sep 17 00:00:00 2001
From: Shenglong Li <shenglol@microsoft.com>
Date: Mon, 29 Jun 2026 14:42:05 -0700
Subject: [PATCH 1/2] Fix escape sequence semantic highlighting

---
 .../Highlighting/SemanticTokenVisitorTests.cs | 26 ++++++
 .../Highlighting/SemanticTokenVisitor.cs      | 90 ++++++++++++++++++-
 2 files changed, 112 insertions(+), 4 deletions(-)
 create mode 100644 src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs

diff --git a/src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs b/src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs
new file mode 100644
index 00000000000..084a2a11abd
--- /dev/null
+++ b/src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs
@@ -0,0 +1,26 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+using Bicep.Core.Highlighting;
+using Bicep.Core.UnitTests.Utils;
+using FluentAssertions;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+namespace Bicep.Core.UnitTests.Highlighting;
+
+[TestClass]
+public class SemanticTokenVisitorTests
+{
+    [TestMethod]
+    public void Build_WithEscapeSequences_ExcludesEscapesFromStringTokens()
+    {
+        var bicepText = @"var foo = 'a\\b\'c\${d\n\r\t\u{1F600}z'";
+        var result = CompilationHelper.Compile(bicepText);
+
+        var stringTokenTexts = SemanticTokenVisitor.Build(result.Compilation.GetEntrypointSemanticModel())
+            .Where(token => token.TokenType == SemanticTokenType.String)
+            .Select(token => bicepText.Substring(token.Positionable.Span.Position, token.Positionable.Span.Length));
+
+        stringTokenTexts.Should().Equal("'a", "b", "c", "d", "z'");
+    }
+}
diff --git a/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs b/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs
index c0cd36b36a3..fb83367d454 100644
--- a/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs
+++ b/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs
@@ -219,7 +219,7 @@ public override void VisitFunctionDeclarationSyntax(FunctionDeclarationSyntax sy
         base.VisitFunctionDeclarationSyntax(syntax);
     }
 
-    private void AddStringToken(Token token, string? start, string? end)
+    private void AddStringToken(Token token, string? start, string? end, bool isSingleLineString)
     {
         var endInterp = (token.Type, end) switch
         {
@@ -248,7 +248,7 @@ private void AddStringToken(Token token, string? start, string? end)
             AddTokenType(token.GetSpanSlice(0, startOperatorLength), SemanticTokenType.Operator);
         }
 
-        AddTokenType(token.GetSpanSlice(startOperatorLength, token.Span.Length - startOperatorLength - endOperatorLength), SemanticTokenType.String);
+        this.AddStringContentTokens(token, startOperatorLength, token.Span.Length - startOperatorLength - endOperatorLength, isSingleLineString);
 
         if (hasEndOperator)
         {
@@ -256,6 +256,86 @@ private void AddStringToken(Token token, string? start, string? end)
         }
     }
 
+    private void AddStringContentTokens(Token token, int start, int length, bool excludeEscapeSequences)
+    {
+        if (length <= 0)
+        {
+            return;
+        }
+
+        if (!excludeEscapeSequences)
+        {
+            this.AddTokenType(token.GetSpanSlice(start, length), SemanticTokenType.String);
+            return;
+        }
+
+        var end = start + length;
+        var segmentStart = start;
+        for (var position = start; position < end; position++)
+        {
+            if (TryGetEscapeSequenceLength(token.Text, position, end) is not { } escapeSequenceLength)
+            {
+                continue;
+            }
+
+            this.AddStringSegmentToken(token, segmentStart, position);
+            position += escapeSequenceLength - 1;
+            segmentStart = position + 1;
+        }
+
+        this.AddStringSegmentToken(token, segmentStart, end);
+    }
+
+    private void AddStringSegmentToken(Token token, int start, int end)
+    {
+        if (end > start)
+        {
+            this.AddTokenType(token.GetSpanSlice(start, end - start), SemanticTokenType.String);
+        }
+    }
+
+    private static int? TryGetEscapeSequenceLength(string text, int position, int end)
+    {
+        if (text[position] != '\\' || position + 1 >= end)
+        {
+            return null;
+        }
+
+        return text[position + 1] switch
+        {
+            'n' or 'r' or 't' or '\\' or '\'' => 2,
+            '$' when position + 2 < end && text[position + 2] == '{' => 3,
+            'u' => TryGetUnicodeEscapeSequenceLength(text, position, end),
+            _ => null,
+        };
+    }
+
+    private static int? TryGetUnicodeEscapeSequenceLength(string text, int position, int end)
+    {
+        if (position + 3 >= end || text[position + 2] != '{')
+        {
+            return null;
+        }
+
+        var hexDigitCount = 0;
+        for (var current = position + 3; current < end; current++)
+        {
+            if (text[current] == '}')
+            {
+                return hexDigitCount > 0 ? current - position + 1 : null;
+            }
+
+            if (!Uri.IsHexDigit(text[current]))
+            {
+                return null;
+            }
+
+            hexDigitCount++;
+        }
+
+        return null;
+    }
+
     public override void VisitTernaryOperationSyntax(TernaryOperationSyntax syntax)
     {
         AddTokenType(syntax.Colon, SemanticTokenType.Operator);
@@ -266,10 +346,11 @@ public override void VisitTernaryOperationSyntax(TernaryOperationSyntax syntax)
     public override void VisitStringTypeLiteralSyntax(StringTypeLiteralSyntax syntax)
     {
         var startAndEndTokens = Lexer.TryGetStartAndEndTokens(syntax.StringTokens).ToImmutableArray();
+        var isSingleLineString = syntax.StringTokens.Length == 0 || !Lexer.GetStringTokenInfo(syntax.StringTokens[0]).isMultiLine;
         for (var i = 0; i < syntax.StringTokens.Length; i++)
         {
             var result = startAndEndTokens[i];
-            AddStringToken(syntax.StringTokens[i], result?.start, result?.end);
+            AddStringToken(syntax.StringTokens[i], result?.start, result?.end, isSingleLineString);
         }
         foreach (var expression in syntax.Expressions)
         {
@@ -280,10 +361,11 @@ public override void VisitStringTypeLiteralSyntax(StringTypeLiteralSyntax syntax
     public override void VisitStringSyntax(StringSyntax syntax)
     {
         var startAndEndTokens = Lexer.TryGetStartAndEndTokens(syntax.StringTokens).ToImmutableArray();
+        var isSingleLineString = syntax.StringTokens.Length == 0 || !Lexer.GetStringTokenInfo(syntax.StringTokens[0]).isMultiLine;
         for (var i = 0; i < syntax.StringTokens.Length; i++)
         {
             var result = startAndEndTokens[i];
-            AddStringToken(syntax.StringTokens[i], result?.start, result?.end);
+            AddStringToken(syntax.StringTokens[i], result?.start, result?.end, isSingleLineString);
         }
         foreach (var expression in syntax.Expressions)
         {

From 17d5c0417fd75a889f84c0aee0056b965baaecde Mon Sep 17 00:00:00 2001
From: Shenglong Li <shenglol@microsoft.com>
Date: Tue, 30 Jun 2026 10:49:28 -0700
Subject: [PATCH 2/2] Reuse lexer escape scanner for semantic tokens

---
 .../Parsing/LexerTests.cs                     |   1 +
 .../Highlighting/SemanticTokenVisitor.cs      |  44 +-----
 src/Bicep.Core/Parsing/Lexer.cs               | 134 +++++++++++-------
 src/Bicep.Core/Parsing/SlidingTextWindow.cs   |   2 +
 4 files changed, 89 insertions(+), 92 deletions(-)

diff --git a/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs b/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs
index be1aea66f9d..5bc6eb22385 100644
--- a/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs
+++ b/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs
@@ -19,6 +19,7 @@ public class LexerTests
         [DataRow(@"'test'", "test")]
         [DataRow(@"'hello there'", "hello there")]
         [DataRow(@"'\r\n\t\\\$\''", "\r\n\t\\$'")]
+        [DataRow(@"'\${foo}'", "${foo}")]
         [DataRow("'First line\\nSecond\\ttabbed\\tline'", "First line\nSecond\ttabbed\tline")]
         // escape ascii
         [DataRow(@"'\u{0}'", "\0")]
diff --git a/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs b/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs
index fb83367d454..7e00286e26e 100644
--- a/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs
+++ b/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs
@@ -273,7 +273,7 @@ private void AddStringContentTokens(Token token, int start, int length, bool exc
         var segmentStart = start;
         for (var position = start; position < end; position++)
         {
-            if (TryGetEscapeSequenceLength(token.Text, position, end) is not { } escapeSequenceLength)
+            if (!Lexer.TryScanStringEscapeSequence(token.Text.AsSpan(position, end - position), out var escapeSequenceLength))
             {
                 continue;
             }
@@ -294,48 +294,6 @@ private void AddStringSegmentToken(Token token, int start, int end)
         }
     }
 
-    private static int? TryGetEscapeSequenceLength(string text, int position, int end)
-    {
-        if (text[position] != '\\' || position + 1 >= end)
-        {
-            return null;
-        }
-
-        return text[position + 1] switch
-        {
-            'n' or 'r' or 't' or '\\' or '\'' => 2,
-            '$' when position + 2 < end && text[position + 2] == '{' => 3,
-            'u' => TryGetUnicodeEscapeSequenceLength(text, position, end),
-            _ => null,
-        };
-    }
-
-    private static int? TryGetUnicodeEscapeSequenceLength(string text, int position, int end)
-    {
-        if (position + 3 >= end || text[position + 2] != '{')
-        {
-            return null;
-        }
-
-        var hexDigitCount = 0;
-        for (var current = position + 3; current < end; current++)
-        {
-            if (text[current] == '}')
-            {
-                return hexDigitCount > 0 ? current - position + 1 : null;
-            }
-
-            if (!Uri.IsHexDigit(text[current]))
-            {
-                return null;
-            }
-
-            hexDigitCount++;
-        }
-
-        return null;
-    }
-
     public override void VisitTernaryOperationSyntax(TernaryOperationSyntax syntax)
     {
         AddTokenType(syntax.Colon, SemanticTokenType.Operator);
diff --git a/src/Bicep.Core/Parsing/Lexer.cs b/src/Bicep.Core/Parsing/Lexer.cs
index 5a96df03e62..e4247775607 100644
--- a/src/Bicep.Core/Parsing/Lexer.cs
+++ b/src/Bicep.Core/Parsing/Lexer.cs
@@ -213,15 +213,14 @@ private static (string start, string end)? TryGetStartAndEndTokens(Token stringT
             }
             var (start, end) = result;
 
-            var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length);
-            var window = new SlidingTextWindow(contents);
+            var contents = stringToken.Text.AsSpan(start.Length, stringToken.Text.Length - start.Length - end.Length);
 
             // the value of the string will be shorter because escapes are longer than the characters they represent
             var buffer = new StringBuilder(contents.Length);
 
-            while (!window.IsAtEnd())
+            for (var position = 0; position < contents.Length; position++)
             {
-                var nextChar = window.Next();
+                var nextChar = contents[position];
 
                 if (nextChar == '\'')
                 {
@@ -230,68 +229,99 @@ private static (string start, string end)? TryGetStartAndEndTokens(Token stringT
 
                 if (nextChar == '\\')
                 {
-                    // escape sequence begins
-                    if (window.IsAtEnd())
+                    if (!TryScanStringEscapeSequence(contents[position..], out var escapeSequenceLength, buffer))
                     {
                         return null;
                     }
 
-                    char escapeChar = window.Next();
+                    position += escapeSequenceLength - 1;
+                    continue;
+                }
 
-                    if (escapeChar == 'u')
-                    {
-                        // unicode escape
-                        char openCurly = window.Next();
-                        if (openCurly != '{')
-                        {
-                            return null;
-                        }
+                // regular string char - append to buffer
+                buffer.Append(nextChar);
+            }
 
-                        var codePointText = ScanHexNumber(window);
-                        if (!TryParseCodePoint(codePointText, out uint codePoint))
-                        {
-                            // invalid codepoint
-                            return null;
-                        }
+            return buffer.ToString();
+        }
 
-                        char closeCurly = window.Next();
-                        if (closeCurly != '}')
-                        {
-                            return null;
-                        }
+        internal static bool TryScanStringEscapeSequence(ReadOnlySpan<char> text, out int length, StringBuilder? buffer = null)
+        {
+            length = 0;
 
-                        char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate);
-                        buffer.Append(charOrHighSurrogate);
-                        if (lowSurrogate != SlidingTextWindow.InvalidCharacter)
-                        {
-                            // previous char was a high surrogate
-                            // also append the low surrogate
-                            buffer.Append(lowSurrogate);
-                        }
+            if (text.Length < 2 || text[0] != '\\')
+            {
+                return false;
+            }
 
-                        continue;
-                    }
+            var escapeChar = text[1];
+            if (escapeChar == 'u')
+            {
+                return TryScanUnicodeEscapeSequence(text, out length, buffer);
+            }
 
-                    if (SingleCharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false)
-                    {
-                        // invalid escape character
-                        return null;
-                    }
+            if (!SingleCharacterEscapes.TryGetValue(escapeChar, out var escapeCharValue))
+            {
+                return false;
+            }
 
-                    buffer.Append(escapeCharValue);
+            length = escapeChar == '$' && text.Length >= 3 && text[2] == '{' ? 3 : 2;
+            buffer?.Append(escapeCharValue);
+            if (length == 3)
+            {
+                buffer?.Append('{');
+            }
 
-                    // continue to next iteration
-                    continue;
-                }
+            return true;
+        }
 
-                // regular string char - append to buffer
-                buffer.Append(nextChar);
+        private static bool TryScanUnicodeEscapeSequence(ReadOnlySpan<char> text, out int length, StringBuilder? buffer)
+        {
+            length = 0;
+
+            if (text.Length < 4 || text[2] != '{')
+            {
+                return false;
             }
 
-            return buffer.ToString();
+            var current = 3;
+            while (current < text.Length && IsHexDigit(text[current]))
+            {
+                current++;
+            }
+
+            if (current == 3 || current >= text.Length || text[current] != '}')
+            {
+                return false;
+            }
+
+            if (!TryParseCodePoint(text[3..current], out uint codePoint))
+            {
+                return false;
+            }
+
+            length = current + 1;
+            if (buffer is not null)
+            {
+                AppendCodePoint(buffer, codePoint);
+            }
+
+            return true;
         }
 
-        private static bool TryParseCodePoint(string text, out uint codePoint) => uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint) && codePoint <= 0x10FFFF;
+        private static bool TryParseCodePoint(string text, out uint codePoint) => TryParseCodePoint(text.AsSpan(), out codePoint);
+
+        private static bool TryParseCodePoint(ReadOnlySpan<char> text, out uint codePoint) => uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint) && codePoint <= 0x10FFFF;
+
+        private static void AppendCodePoint(StringBuilder buffer, uint codePoint)
+        {
+            char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate);
+            buffer.Append(charOrHighSurrogate);
+            if (lowSurrogate != SlidingTextWindow.InvalidCharacter)
+            {
+                buffer.Append(lowSurrogate);
+            }
+        }
 
         /// <summary>
         /// Determines if the specified string is a valid identifier. To be considered a valid identifier, the string must start
@@ -772,6 +802,12 @@ private TokenType ScanStringSegment(bool isAtStartOfString)
                     return isAtStartOfString ? TokenType.StringComplete : TokenType.StringRightPiece;
                 }
 
+                if (TryScanStringEscapeSequence(textWindow.GetTextFromPosition(escapeBeginPosition), out var escapeSequenceLength))
+                {
+                    textWindow.Advance(escapeSequenceLength - 1);
+                    continue;
+                }
+
                 // the escape sequence has a char after the \
                 // consume it
                 nextChar = textWindow.Peek();
diff --git a/src/Bicep.Core/Parsing/SlidingTextWindow.cs b/src/Bicep.Core/Parsing/SlidingTextWindow.cs
index a10edab49fc..a3948ae6ec2 100644
--- a/src/Bicep.Core/Parsing/SlidingTextWindow.cs
+++ b/src/Bicep.Core/Parsing/SlidingTextWindow.cs
@@ -119,5 +119,7 @@ public string GetTextBetweenLineStartAndCurrentPosition()
             var positionAfterNewLine = indexOfPreviousNewLine + 1;
             return text[positionAfterNewLine..position];
         }
+
+        internal ReadOnlySpan<char> GetTextFromPosition(int absolutePosition) => text.AsSpan(absolutePosition);
     }
 }