Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/Bicep.Core.UnitTests/Highlighting/SemanticTokenVisitorTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

using Bicep.Core.Highlighting;
using Bicep.Core.UnitTests.Utils;
using FluentAssertions;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Bicep.Core.UnitTests.Highlighting;

[TestClass]
public class SemanticTokenVisitorTests
{
[TestMethod]
public void Build_WithEscapeSequences_ExcludesEscapesFromStringTokens()
{
var bicepText = @"var foo = 'a\\b\'c\${d\n\r\t\u{1F600}z'";
var result = CompilationHelper.Compile(bicepText);

var stringTokenTexts = SemanticTokenVisitor.Build(result.Compilation.GetEntrypointSemanticModel())
.Where(token => token.TokenType == SemanticTokenType.String)
.Select(token => bicepText.Substring(token.Positionable.Span.Position, token.Positionable.Span.Length));

stringTokenTexts.Should().Equal("'a", "b", "c", "d", "z'");
}
}
1 change: 1 addition & 0 deletions src/Bicep.Core.UnitTests/Parsing/LexerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class LexerTests
[DataRow(@"'test'", "test")]
[DataRow(@"'hello there'", "hello there")]
[DataRow(@"'\r\n\t\\\$\''", "\r\n\t\\$'")]
[DataRow(@"'\${foo}'", "${foo}")]
[DataRow("'First line\\nSecond\\ttabbed\\tline'", "First line\nSecond\ttabbed\tline")]
// escape ascii
[DataRow(@"'\u{0}'", "\0")]
Expand Down
48 changes: 44 additions & 4 deletions src/Bicep.Core/Highlighting/SemanticTokenVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ public override void VisitFunctionDeclarationSyntax(FunctionDeclarationSyntax sy
base.VisitFunctionDeclarationSyntax(syntax);
}

private void AddStringToken(Token token, string? start, string? end)
private void AddStringToken(Token token, string? start, string? end, bool isSingleLineString)
{
var endInterp = (token.Type, end) switch
{
Expand Down Expand Up @@ -248,14 +248,52 @@ private void AddStringToken(Token token, string? start, string? end)
AddTokenType(token.GetSpanSlice(0, startOperatorLength), SemanticTokenType.Operator);
}

AddTokenType(token.GetSpanSlice(startOperatorLength, token.Span.Length - startOperatorLength - endOperatorLength), SemanticTokenType.String);
this.AddStringContentTokens(token, startOperatorLength, token.Span.Length - startOperatorLength - endOperatorLength, isSingleLineString);

if (hasEndOperator)
{
AddTokenType(token.GetSpanSlice(token.Span.Length - endOperatorLength, endOperatorLength), SemanticTokenType.Operator);
}
}

private void AddStringContentTokens(Token token, int start, int length, bool excludeEscapeSequences)
{
if (length <= 0)
{
return;
}

if (!excludeEscapeSequences)
{
this.AddTokenType(token.GetSpanSlice(start, length), SemanticTokenType.String);
return;
}

var end = start + length;
var segmentStart = start;
for (var position = start; position < end; position++)
{
if (!Lexer.TryScanStringEscapeSequence(token.Text.AsSpan(position, end - position), out var escapeSequenceLength))
{
continue;
}

this.AddStringSegmentToken(token, segmentStart, position);
position += escapeSequenceLength - 1;
segmentStart = position + 1;
}

this.AddStringSegmentToken(token, segmentStart, end);
}

private void AddStringSegmentToken(Token token, int start, int end)
{
if (end > start)
{
this.AddTokenType(token.GetSpanSlice(start, end - start), SemanticTokenType.String);
}
}

public override void VisitTernaryOperationSyntax(TernaryOperationSyntax syntax)
{
AddTokenType(syntax.Colon, SemanticTokenType.Operator);
Expand All @@ -266,10 +304,11 @@ public override void VisitTernaryOperationSyntax(TernaryOperationSyntax syntax)
public override void VisitStringTypeLiteralSyntax(StringTypeLiteralSyntax syntax)
{
var startAndEndTokens = Lexer.TryGetStartAndEndTokens(syntax.StringTokens).ToImmutableArray();
var isSingleLineString = syntax.StringTokens.Length == 0 || !Lexer.GetStringTokenInfo(syntax.StringTokens[0]).isMultiLine;
for (var i = 0; i < syntax.StringTokens.Length; i++)
{
var result = startAndEndTokens[i];
AddStringToken(syntax.StringTokens[i], result?.start, result?.end);
AddStringToken(syntax.StringTokens[i], result?.start, result?.end, isSingleLineString);
}
foreach (var expression in syntax.Expressions)
{
Expand All @@ -280,10 +319,11 @@ public override void VisitStringTypeLiteralSyntax(StringTypeLiteralSyntax syntax
public override void VisitStringSyntax(StringSyntax syntax)
{
var startAndEndTokens = Lexer.TryGetStartAndEndTokens(syntax.StringTokens).ToImmutableArray();
var isSingleLineString = syntax.StringTokens.Length == 0 || !Lexer.GetStringTokenInfo(syntax.StringTokens[0]).isMultiLine;
for (var i = 0; i < syntax.StringTokens.Length; i++)
{
var result = startAndEndTokens[i];
AddStringToken(syntax.StringTokens[i], result?.start, result?.end);
AddStringToken(syntax.StringTokens[i], result?.start, result?.end, isSingleLineString);
}
foreach (var expression in syntax.Expressions)
{
Expand Down
134 changes: 85 additions & 49 deletions src/Bicep.Core/Parsing/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,14 @@ private static (string start, string end)? TryGetStartAndEndTokens(Token stringT
}
var (start, end) = result;

var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length);
var window = new SlidingTextWindow(contents);
var contents = stringToken.Text.AsSpan(start.Length, stringToken.Text.Length - start.Length - end.Length);

// the value of the string will be shorter because escapes are longer than the characters they represent
var buffer = new StringBuilder(contents.Length);

while (!window.IsAtEnd())
for (var position = 0; position < contents.Length; position++)
{
var nextChar = window.Next();
var nextChar = contents[position];

if (nextChar == '\'')
{
Expand All @@ -230,68 +229,99 @@ private static (string start, string end)? TryGetStartAndEndTokens(Token stringT

if (nextChar == '\\')
{
// escape sequence begins
if (window.IsAtEnd())
if (!TryScanStringEscapeSequence(contents[position..], out var escapeSequenceLength, buffer))
{
return null;
}

char escapeChar = window.Next();
position += escapeSequenceLength - 1;
continue;
}

if (escapeChar == 'u')
{
// unicode escape
char openCurly = window.Next();
if (openCurly != '{')
{
return null;
}
// regular string char - append to buffer
buffer.Append(nextChar);
}

var codePointText = ScanHexNumber(window);
if (!TryParseCodePoint(codePointText, out uint codePoint))
{
// invalid codepoint
return null;
}
return buffer.ToString();
}

char closeCurly = window.Next();
if (closeCurly != '}')
{
return null;
}
internal static bool TryScanStringEscapeSequence(ReadOnlySpan<char> text, out int length, StringBuilder? buffer = null)
{
length = 0;

char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate);
buffer.Append(charOrHighSurrogate);
if (lowSurrogate != SlidingTextWindow.InvalidCharacter)
{
// previous char was a high surrogate
// also append the low surrogate
buffer.Append(lowSurrogate);
}
if (text.Length < 2 || text[0] != '\\')
{
return false;
}

continue;
}
var escapeChar = text[1];
if (escapeChar == 'u')
{
return TryScanUnicodeEscapeSequence(text, out length, buffer);
}

if (SingleCharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false)
{
// invalid escape character
return null;
}
if (!SingleCharacterEscapes.TryGetValue(escapeChar, out var escapeCharValue))
{
return false;
}

buffer.Append(escapeCharValue);
length = escapeChar == '$' && text.Length >= 3 && text[2] == '{' ? 3 : 2;
buffer?.Append(escapeCharValue);
if (length == 3)
{
buffer?.Append('{');
}

// continue to next iteration
continue;
}
return true;
}

// regular string char - append to buffer
buffer.Append(nextChar);
private static bool TryScanUnicodeEscapeSequence(ReadOnlySpan<char> text, out int length, StringBuilder? buffer)
{
length = 0;

if (text.Length < 4 || text[2] != '{')
{
return false;
}

return buffer.ToString();
var current = 3;
while (current < text.Length && IsHexDigit(text[current]))
{
current++;
}

if (current == 3 || current >= text.Length || text[current] != '}')
{
return false;
}

if (!TryParseCodePoint(text[3..current], out uint codePoint))
{
return false;
}

length = current + 1;
if (buffer is not null)
{
AppendCodePoint(buffer, codePoint);
}

return true;
}

private static bool TryParseCodePoint(string text, out uint codePoint) => uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint) && codePoint <= 0x10FFFF;
private static bool TryParseCodePoint(string text, out uint codePoint) => TryParseCodePoint(text.AsSpan(), out codePoint);

private static bool TryParseCodePoint(ReadOnlySpan<char> text, out uint codePoint) => uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint) && codePoint <= 0x10FFFF;

private static void AppendCodePoint(StringBuilder buffer, uint codePoint)
{
char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate);
buffer.Append(charOrHighSurrogate);
if (lowSurrogate != SlidingTextWindow.InvalidCharacter)
{
buffer.Append(lowSurrogate);
}
}

/// <summary>
/// Determines if the specified string is a valid identifier. To be considered a valid identifier, the string must start
Expand Down Expand Up @@ -772,6 +802,12 @@ private TokenType ScanStringSegment(bool isAtStartOfString)
return isAtStartOfString ? TokenType.StringComplete : TokenType.StringRightPiece;
}

if (TryScanStringEscapeSequence(textWindow.GetTextFromPosition(escapeBeginPosition), out var escapeSequenceLength))
{
textWindow.Advance(escapeSequenceLength - 1);
continue;
}

// the escape sequence has a char after the \
// consume it
nextChar = textWindow.Peek();
Expand Down
2 changes: 2 additions & 0 deletions src/Bicep.Core/Parsing/SlidingTextWindow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,7 @@ public string GetTextBetweenLineStartAndCurrentPosition()
var positionAfterNewLine = indexOfPreviousNewLine + 1;
return text[positionAfterNewLine..position];
}

internal ReadOnlySpan<char> GetTextFromPosition(int absolutePosition) => text.AsSpan(absolutePosition);
}
}
Loading