Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ClosedXML.Parser.Ast/AstNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ public abstract record AstNode
public virtual bool Equals(AstNode? other) => other is not null && Children.SequenceEqual(other.Children);

public override int GetHashCode() => Children.Sum(child => child.GetHashCode());
}
}
4 changes: 2 additions & 2 deletions src/ClosedXML.Parser.Ast/BinaryNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ public record BinaryNode(BinaryOperation Operation) : AstNode
public BinaryNode(BinaryOperation operation, AstNode left, AstNode right)
: this(operation)
{
Children = new[] { left, right };
Children = [left, right];
}

public override string GetDisplayString(ReferenceStyle style) => OpNames[Operation];
};
}
4 changes: 2 additions & 2 deletions src/ClosedXML.Parser.Ast/ValueNode.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace ClosedXML.Parser;
namespace ClosedXML.Parser;

public record ValueNode(string Type, object Value) : AstNode
{
Expand All @@ -11,4 +11,4 @@ public override string GetDisplayString(ReferenceStyle style)
{
return Value?.ToString() ?? "BLANK";
}
};
}
63 changes: 63 additions & 0 deletions src/ClosedXML.Parser.Tests/Lexers/PrattParserPrecedenceTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using System.Diagnostics;
using ClosedXML.Parser.Pratt;

namespace ClosedXML.Parser.Tests.Lexers;

public class PrattParserPrecedenceTests
{
[Theory]
[InlineData("1+2+3+4", "(((1+2)+3)+4)")]
[InlineData("1-2-3-4", "(((1-2)-3)-4)")]
[InlineData("1-2+3-4+5", "((((1-2)+3)-4)+5)")]
[InlineData("1*2*3*4", "(((1*2)*3)*4)")]
[InlineData("1/2/3/4", "(((1/2)/3)/4)")]
[InlineData("1*2/3*4/5", "((((1*2)/3)*4)/5)")]
[InlineData("2^3^4^5", "(((2^3)^4)^5)")] // Even exponential is left-associative in Excel, contrary to standard convention
public void Operations_with_same_precedence_are_left_associative(string formula, string normalizedForm)
{
AssertSameFormulas(formula, normalizedForm);
}

[Theory]
[InlineData("1+(2+3+4)+((5+6)+7)", "((1+((2+3)+4))+((5+6)+7))")]
[InlineData("1-(2-3-4)-((5-6)-7)", "((1-((2-3)-4))-((5-6)-7))")]
[InlineData("1-(2+3-4)+((5-6)+7)", "((1-((2+3)-4))+((5-6)+7))")]
[InlineData("1*(2*3*4)*((5*6)*7)", "((1*((2*3)*4))*((5*6)*7))")]
[InlineData("1/(2/3/4)/((5/6)/7)", "((1/((2/3)/4))/((5/6)/7))")]
[InlineData("1/(2*3/4)*((5/6)*7)", "((1/((2*3)/4))*((5/6)*7))")]
[InlineData("2^(3^4)^5", "((2^(3^4))^5)")]
public void Groups_override_precedence(string formula, string normalizedForm)
{
AssertSameFormulas(formula, normalizedForm);
}

[Theory]
[InlineData("1+2*3+4/5*6^7-8", "(((1+(2*3))+((4/5)*(6^7)))-8)")]
[InlineData("1+2-3*4+5/6^7-8*9", "((((1+2)-(3*4))+(5/(6^7)))-(8*9))")]
public void Operations_are_grouped_by_precedence(string formula, string normalizedForm)
{
AssertSameFormulas(formula, normalizedForm);
}

private static void AssertSameFormulas(string formula, string normalizedForm)
{
var parser = ParserFactory.Create(new F());
var root = parser.ParseFormula(formula, new Ctx());

Assert.Equal(normalizedForm, GetNormalizedForm(root));
}

private static string GetNormalizedForm(AstNode node)
{
return node switch
{
ValueNode value => value.GetDisplayString(A1),
BinaryNode binaryOp => "(" +
GetNormalizedForm(binaryOp.Children[0]) +
binaryOp.GetDisplayString(A1) +
GetNormalizedForm(binaryOp.Children[1]) +
")",
_ => throw new UnreachableException()
};
}
}
1 change: 1 addition & 0 deletions src/ClosedXML.Parser.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<s:Boolean x:Key="/Default/UserDictionary/Words/=Intra/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=isnt/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=MULT/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=parselet/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=rowspan/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=unescape/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=unparsable/@EntryIndexedValue">True</s:Boolean>
Expand Down
15 changes: 15 additions & 0 deletions src/ClosedXML.Parser/Pratt/BindingPower.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace ClosedXML.Parser.Pratt;

/// <summary>
/// Values of binding power for operators in an expression. Higher number = higher binding power.
/// Precedence of operators is specified by ISO-29500:18.17.2.2. Operators that have the same
/// precedence associate left-to-right.
/// </summary>
internal static class BindingPower
{
internal const int Addition = 3;
internal const int Subtraction = 3;
internal const int Multiplication = 4;
internal const int Division = 4;
internal const int Exponentiation = 5;
}
8 changes: 8 additions & 0 deletions src/ClosedXML.Parser/Pratt/IParselet.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace ClosedXML.Parser.Pratt;

internal interface IParselet<T, in TContext>
{
Node<T> Parse(TContext ctx, Node<T> left, Token op);

int GetBindingPower();
}
6 changes: 6 additions & 0 deletions src/ClosedXML.Parser/Pratt/IPrefixParselet.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace ClosedXML.Parser.Pratt;

internal interface IPrefixParselet<T, in TContext>
{
Node<T> Parse(TContext ctx, Token token);
}
27 changes: 19 additions & 8 deletions src/ClosedXML.Parser/Pratt/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ internal class Lexer
private static readonly bool[] IsOp;

private readonly Queue<Token> _queue = new(4);
private readonly string _input;

private string _input = string.Empty; // Currently tokenized formula
private int _start; // The start index of currently parsed token in Next()
private int _i; // Index of current code point _c in _input
private int _c; // A current code point (including astral planes) or -1 if at the EOF
Expand All @@ -30,18 +29,30 @@ static Lexer()
IsOp[op] = true;
}

/// <summary>
/// Create a new instance of a lexer.
/// </summary>
/// <param name="input">Formula to tokenize.</param>
public Lexer()
: this(string.Empty)
{
}

public Lexer(string input)
{
_input = input ?? throw new ArgumentNullException();
_i = -1;
Reset(input);
}

private bool IsEof => _c == EOF;

/// <summary>
/// Prepare lexer to start tokenization of the <paramref name="formula"/>.
/// </summary>
/// <param name="formula">Formula to tokenize.</param>
public void Reset(string formula)
{
_input = formula ?? throw new ArgumentNullException();
_start = -1;
_i = -1;
_c = 0;
}

public Token Consume()
{
if (_queue.Count == 0)
Expand Down
44 changes: 44 additions & 0 deletions src/ClosedXML.Parser/Pratt/Node.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
namespace ClosedXML.Parser.Pratt;

/// <summary>
/// An info about node used during parsing.
/// </summary>
/// <typeparam name="T">The <c>TNode</c> type of a node from <see cref="IAstFactory{TScalarValue,TNode,TContext}"/>.</typeparam>
internal readonly struct Node<T>
{
public Node(T value, int start, int end)
: this(value, new SymbolRange(start, end))
{
}

public Node(T value, SymbolRange range)
{
Value = value;
Range = range;
}

/// <summary>
/// Parsed value of a node, created by the <see cref="IAstFactory{TScalarValue,TNode,TContext}"/>.
/// </summary>
public T Value { get; }

/// <summary>
/// A range that was used to created the node.
/// </summary>
public SymbolRange Range { get; }

public static implicit operator T(Node<T> node)
{
return node.Value;
}

internal Node<T> ExtendLeft(Token token)
{
return new Node<T>(Value, token.Range.ExtendRight(Range));
}

internal Node<T> ExtendRight(Token token)
{
return new Node<T>(Value, Range.ExtendRight(token.Range));
}
}
34 changes: 34 additions & 0 deletions src/ClosedXML.Parser/Pratt/Parselets/BinaryOpParselet.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
namespace ClosedXML.Parser.Pratt.Parselets;

internal class BinaryOpParselet<TScalar, T, TContext> : IParselet<T, TContext>
{
private readonly IAstFactory<TScalar, T, TContext> _factory;
private readonly Parser<T, TContext> _parser;
private readonly BinaryOperation _op;
private readonly int _bp;

public BinaryOpParselet(IAstFactory<TScalar, T, TContext> factory, Parser<T, TContext> parser, BinaryOperation op, int bp)
{
_factory = factory;
_parser = parser;
_op = op;
_bp = bp;
}

public Node<T> Parse(TContext ctx, Node<T> left, Token op)
{
var right = _parser.ParseExpression(ctx, _bp);
var nodeRange = left.Range
.ExtendRight(op.Range)
.ExtendRight(right.Range);

var node = _factory.BinaryNode(ctx, nodeRange, _op, left, right);
return new Node<T>(node, nodeRange);
}

public int GetBindingPower()
{
return _bp;
}
}

18 changes: 18 additions & 0 deletions src/ClosedXML.Parser/Pratt/Parselets/GroupParselet.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace ClosedXML.Parser.Pratt.Parselets;

internal class GroupParselet<T, TContext> : IPrefixParselet<T, TContext>
{
private readonly Parser<T, TContext> _parser;

public GroupParselet(Parser<T, TContext> parser)
{
_parser = parser;
}

public Node<T> Parse(TContext ctx, Token leftParen)
{
var node = _parser.ParseExpression(ctx, 0);
var rightParen = _parser.Consume(TokenType.RightParen);
return node.ExtendLeft(leftParen).ExtendRight(rightParen);
}
}
34 changes: 34 additions & 0 deletions src/ClosedXML.Parser/Pratt/Parselets/NumberParselet.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using System.Globalization;

namespace ClosedXML.Parser.Pratt.Parselets;

/// <summary>
/// Get a number node from a <see cref="TokenType.Number"/> token.
/// </summary>
/// <remarks>
/// <c>double.Parse</c> parses even <c>NaN</c> or <c>∞</c>, but we can never receive such text
/// from the lexer.
/// </remarks>
internal class NumberParselet<TScalar, T, TContext> : IPrefixParselet<T, TContext>
{
private readonly IAstFactory<TScalar, T, TContext> _factory;
private readonly Parser<T, TContext> _parser;

public NumberParselet(IAstFactory<TScalar, T, TContext> factory, Parser<T, TContext> parser)
{
_factory = factory;
_parser = parser;
}

public Node<T> Parse(TContext ctx, Token token)
{
#if NETSTANDARD2_1
var text = token.GetText(_parser.Input);
#else
var text = token.GetText(_parser.Input).ToString(); // NetFx has a double whammy, it's slow and gets extra memory to GC
#endif
var number = double.Parse(text, NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
var node = _factory.NumberNode(ctx, token.Range, number);
return new Node<T>(node, token.Range);
}
}
77 changes: 77 additions & 0 deletions src/ClosedXML.Parser/Pratt/Parser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
using System;
using System.Collections.Generic;

namespace ClosedXML.Parser.Pratt;

/// <summary>
/// Pratt parser.
/// </summary>
internal class Parser<T, TContext>
{
private readonly Lexer _lexer = new();
private readonly Dictionary<TokenType, IPrefixParselet<T, TContext>> _prefixParselets = new();
private readonly Dictionary<TokenType, IParselet<T, TContext>> _parselets = new();

internal string Input { get; private set; } = string.Empty;

public T ParseFormula(string formula, TContext ctx)
{
Input = formula;
_lexer.Reset(formula);
return ParseExpression(ctx, 0).Value;
}

internal Node<T> ParseExpression(TContext ctx, int minBp)
{
var node = Prefix(ctx);

while (true)
{
var maybeOp = _lexer.Peek();
if (maybeOp.Type == TokenType.Eof)
break;

var isOp = _parselets.TryGetValue(maybeOp.Type, out var parselet);
if (!isOp)
break;

var bp = parselet!.GetBindingPower();
if (bp <= minBp)
break;

var op = _lexer.Consume();
node = parselet.Parse(ctx, node, op);
}

return node;
}

private Node<T> Prefix(TContext ctx)
{
var token = _lexer.Consume();

if (!_prefixParselets.TryGetValue(token.Type, out var parselet))
throw new InvalidOperationException($"No parselet found for {token.Type}.");

return parselet.Parse(ctx, token);
}

internal Token Consume(TokenType expectedType)
{
var token = _lexer.Consume();
if (token.Type != expectedType)
throw new InvalidOperationException($"Expected token of type {expectedType}, but received {token.Type}.");

return token;
}

internal void Register(TokenType type, IPrefixParselet<T, TContext> parselet)
{
_prefixParselets.Add(type, parselet);
}

internal void Register(TokenType type, IParselet<T, TContext> parselet)
{
_parselets.Add(type, parselet);
}
}
Loading
Loading