diff --git a/src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs b/src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs new file mode 100644 index 00000000000..084a2a11abd --- /dev/null +++ b/src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using Bicep.Core.Highlighting; +using Bicep.Core.UnitTests.Utils; +using FluentAssertions; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace Bicep.Core.UnitTests.Highlighting; + +[TestClass] +public class SemanticTokenVisitorTests +{ + [TestMethod] + public void Build_WithEscapeSequences_ExcludesEscapesFromStringTokens() + { + var bicepText = @"var foo = 'a\\b\'c\${d\n\r\t\u{1F600}z'"; + var result = CompilationHelper.Compile(bicepText); + + var stringTokenTexts = SemanticTokenVisitor.Build(result.Compilation.GetEntrypointSemanticModel()) + .Where(token => token.TokenType == SemanticTokenType.String) + .Select(token => bicepText.Substring(token.Positionable.Span.Position, token.Positionable.Span.Length)); + + stringTokenTexts.Should().Equal("'a", "b", "c", "d", "z'"); + } +} diff --git a/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs b/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs index be1aea66f9d..5bc6eb22385 100644 --- a/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs +++ b/src/Bicep.Core.UnitTests/Parsing/LexerTests.cs @@ -19,6 +19,7 @@ public class LexerTests [DataRow(@"'test'", "test")] [DataRow(@"'hello there'", "hello there")] [DataRow(@"'\r\n\t\\\$\''", "\r\n\t\\$'")] + [DataRow(@"'\${foo}'", "${foo}")] [DataRow("'First line\\nSecond\\ttabbed\\tline'", "First line\nSecond\ttabbed\tline")] // escape ascii [DataRow(@"'\u{0}'", "\0")] diff --git a/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs b/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs index c0cd36b36a3..7e00286e26e 100644 --- a/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs +++ b/src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs @@ -219,7 +219,7 @@ public override void VisitFunctionDeclarationSyntax(FunctionDeclarationSyntax sy base.VisitFunctionDeclarationSyntax(syntax); } - private void AddStringToken(Token token, string? start, string? end) + private void AddStringToken(Token token, string? start, string? end, bool isSingleLineString) { var endInterp = (token.Type, end) switch { @@ -248,7 +248,7 @@ private void AddStringToken(Token token, string? start, string? end) AddTokenType(token.GetSpanSlice(0, startOperatorLength), SemanticTokenType.Operator); } - AddTokenType(token.GetSpanSlice(startOperatorLength, token.Span.Length - startOperatorLength - endOperatorLength), SemanticTokenType.String); + this.AddStringContentTokens(token, startOperatorLength, token.Span.Length - startOperatorLength - endOperatorLength, isSingleLineString); if (hasEndOperator) { @@ -256,6 +256,44 @@ private void AddStringToken(Token token, string? start, string? end) } } + private void AddStringContentTokens(Token token, int start, int length, bool excludeEscapeSequences) + { + if (length <= 0) + { + return; + } + + if (!excludeEscapeSequences) + { + this.AddTokenType(token.GetSpanSlice(start, length), SemanticTokenType.String); + return; + } + + var end = start + length; + var segmentStart = start; + for (var position = start; position < end; position++) + { + if (!Lexer.TryScanStringEscapeSequence(token.Text.AsSpan(position, end - position), out var escapeSequenceLength)) + { + continue; + } + + this.AddStringSegmentToken(token, segmentStart, position); + position += escapeSequenceLength - 1; + segmentStart = position + 1; + } + + this.AddStringSegmentToken(token, segmentStart, end); + } + + private void AddStringSegmentToken(Token token, int start, int end) + { + if (end > start) + { + this.AddTokenType(token.GetSpanSlice(start, end - start), SemanticTokenType.String); + } + } + public override void VisitTernaryOperationSyntax(TernaryOperationSyntax syntax) { AddTokenType(syntax.Colon, SemanticTokenType.Operator); @@ -266,10 +304,11 @@ public override void VisitTernaryOperationSyntax(TernaryOperationSyntax syntax) public override void VisitStringTypeLiteralSyntax(StringTypeLiteralSyntax syntax) { var startAndEndTokens = Lexer.TryGetStartAndEndTokens(syntax.StringTokens).ToImmutableArray(); + var isSingleLineString = syntax.StringTokens.Length == 0 || !Lexer.GetStringTokenInfo(syntax.StringTokens[0]).isMultiLine; for (var i = 0; i < syntax.StringTokens.Length; i++) { var result = startAndEndTokens[i]; - AddStringToken(syntax.StringTokens[i], result?.start, result?.end); + AddStringToken(syntax.StringTokens[i], result?.start, result?.end, isSingleLineString); } foreach (var expression in syntax.Expressions) { @@ -280,10 +319,11 @@ public override void VisitStringTypeLiteralSyntax(StringTypeLiteralSyntax syntax public override void VisitStringSyntax(StringSyntax syntax) { var startAndEndTokens = Lexer.TryGetStartAndEndTokens(syntax.StringTokens).ToImmutableArray(); + var isSingleLineString = syntax.StringTokens.Length == 0 || !Lexer.GetStringTokenInfo(syntax.StringTokens[0]).isMultiLine; for (var i = 0; i < syntax.StringTokens.Length; i++) { var result = startAndEndTokens[i]; - AddStringToken(syntax.StringTokens[i], result?.start, result?.end); + AddStringToken(syntax.StringTokens[i], result?.start, result?.end, isSingleLineString); } foreach (var expression in syntax.Expressions) { diff --git a/src/Bicep.Core/Parsing/Lexer.cs b/src/Bicep.Core/Parsing/Lexer.cs index 5a96df03e62..e4247775607 100644 --- a/src/Bicep.Core/Parsing/Lexer.cs +++ b/src/Bicep.Core/Parsing/Lexer.cs @@ -213,15 +213,14 @@ private static (string start, string end)? TryGetStartAndEndTokens(Token stringT } var (start, end) = result; - var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length); - var window = new SlidingTextWindow(contents); + var contents = stringToken.Text.AsSpan(start.Length, stringToken.Text.Length - start.Length - end.Length); // the value of the string will be shorter because escapes are longer than the characters they represent var buffer = new StringBuilder(contents.Length); - while (!window.IsAtEnd()) + for (var position = 0; position < contents.Length; position++) { - var nextChar = window.Next(); + var nextChar = contents[position]; if (nextChar == '\'') { @@ -230,68 +229,99 @@ private static (string start, string end)? TryGetStartAndEndTokens(Token stringT if (nextChar == '\\') { - // escape sequence begins - if (window.IsAtEnd()) + if (!TryScanStringEscapeSequence(contents[position..], out var escapeSequenceLength, buffer)) { return null; } - char escapeChar = window.Next(); + position += escapeSequenceLength - 1; + continue; + } - if (escapeChar == 'u') - { - // unicode escape - char openCurly = window.Next(); - if (openCurly != '{') - { - return null; - } + // regular string char - append to buffer + buffer.Append(nextChar); + } - var codePointText = ScanHexNumber(window); - if (!TryParseCodePoint(codePointText, out uint codePoint)) - { - // invalid codepoint - return null; - } + return buffer.ToString(); + } - char closeCurly = window.Next(); - if (closeCurly != '}') - { - return null; - } + internal static bool TryScanStringEscapeSequence(ReadOnlySpan text, out int length, StringBuilder? buffer = null) + { + length = 0; - char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate); - buffer.Append(charOrHighSurrogate); - if (lowSurrogate != SlidingTextWindow.InvalidCharacter) - { - // previous char was a high surrogate - // also append the low surrogate - buffer.Append(lowSurrogate); - } + if (text.Length < 2 || text[0] != '\\') + { + return false; + } - continue; - } + var escapeChar = text[1]; + if (escapeChar == 'u') + { + return TryScanUnicodeEscapeSequence(text, out length, buffer); + } - if (SingleCharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false) - { - // invalid escape character - return null; - } + if (!SingleCharacterEscapes.TryGetValue(escapeChar, out var escapeCharValue)) + { + return false; + } - buffer.Append(escapeCharValue); + length = escapeChar == '$' && text.Length >= 3 && text[2] == '{' ? 3 : 2; + buffer?.Append(escapeCharValue); + if (length == 3) + { + buffer?.Append('{'); + } - // continue to next iteration - continue; - } + return true; + } - // regular string char - append to buffer - buffer.Append(nextChar); + private static bool TryScanUnicodeEscapeSequence(ReadOnlySpan text, out int length, StringBuilder? buffer) + { + length = 0; + + if (text.Length < 4 || text[2] != '{') + { + return false; } - return buffer.ToString(); + var current = 3; + while (current < text.Length && IsHexDigit(text[current])) + { + current++; + } + + if (current == 3 || current >= text.Length || text[current] != '}') + { + return false; + } + + if (!TryParseCodePoint(text[3..current], out uint codePoint)) + { + return false; + } + + length = current + 1; + if (buffer is not null) + { + AppendCodePoint(buffer, codePoint); + } + + return true; } - private static bool TryParseCodePoint(string text, out uint codePoint) => uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint) && codePoint <= 0x10FFFF; + private static bool TryParseCodePoint(string text, out uint codePoint) => TryParseCodePoint(text.AsSpan(), out codePoint); + + private static bool TryParseCodePoint(ReadOnlySpan text, out uint codePoint) => uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint) && codePoint <= 0x10FFFF; + + private static void AppendCodePoint(StringBuilder buffer, uint codePoint) + { + char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate); + buffer.Append(charOrHighSurrogate); + if (lowSurrogate != SlidingTextWindow.InvalidCharacter) + { + buffer.Append(lowSurrogate); + } + } /// /// Determines if the specified string is a valid identifier. To be considered a valid identifier, the string must start @@ -772,6 +802,12 @@ private TokenType ScanStringSegment(bool isAtStartOfString) return isAtStartOfString ? TokenType.StringComplete : TokenType.StringRightPiece; } + if (TryScanStringEscapeSequence(textWindow.GetTextFromPosition(escapeBeginPosition), out var escapeSequenceLength)) + { + textWindow.Advance(escapeSequenceLength - 1); + continue; + } + // the escape sequence has a char after the \ // consume it nextChar = textWindow.Peek(); diff --git a/src/Bicep.Core/Parsing/SlidingTextWindow.cs b/src/Bicep.Core/Parsing/SlidingTextWindow.cs index a10edab49fc..a3948ae6ec2 100644 --- a/src/Bicep.Core/Parsing/SlidingTextWindow.cs +++ b/src/Bicep.Core/Parsing/SlidingTextWindow.cs @@ -119,5 +119,7 @@ public string GetTextBetweenLineStartAndCurrentPosition() var positionAfterNewLine = indexOfPreviousNewLine + 1; return text[positionAfterNewLine..position]; } + + internal ReadOnlySpan GetTextFromPosition(int absolutePosition) => text.AsSpan(absolutePosition); } }