diff --git a/src/ClosedXML.Parser.Ast/AstNode.cs b/src/ClosedXML.Parser.Ast/AstNode.cs
index e7ea1bc..7aad746 100644
--- a/src/ClosedXML.Parser.Ast/AstNode.cs
+++ b/src/ClosedXML.Parser.Ast/AstNode.cs
@@ -14,4 +14,4 @@ public abstract record AstNode
public virtual bool Equals(AstNode? other) => other is not null && Children.SequenceEqual(other.Children);
public override int GetHashCode() => Children.Sum(child => child.GetHashCode());
-}
\ No newline at end of file
+}
diff --git a/src/ClosedXML.Parser.Ast/BinaryNode.cs b/src/ClosedXML.Parser.Ast/BinaryNode.cs
index a8ad586..93c8834 100644
--- a/src/ClosedXML.Parser.Ast/BinaryNode.cs
+++ b/src/ClosedXML.Parser.Ast/BinaryNode.cs
@@ -24,8 +24,8 @@ public record BinaryNode(BinaryOperation Operation) : AstNode
public BinaryNode(BinaryOperation operation, AstNode left, AstNode right)
: this(operation)
{
- Children = new[] { left, right };
+ Children = [left, right];
}
public override string GetDisplayString(ReferenceStyle style) => OpNames[Operation];
-};
\ No newline at end of file
+}
diff --git a/src/ClosedXML.Parser.Ast/ValueNode.cs b/src/ClosedXML.Parser.Ast/ValueNode.cs
index 0a52355..026820c 100644
--- a/src/ClosedXML.Parser.Ast/ValueNode.cs
+++ b/src/ClosedXML.Parser.Ast/ValueNode.cs
@@ -1,4 +1,4 @@
-namespace ClosedXML.Parser;
+namespace ClosedXML.Parser;
public record ValueNode(string Type, object Value) : AstNode
{
@@ -11,4 +11,4 @@ public override string GetDisplayString(ReferenceStyle style)
{
return Value?.ToString() ?? "BLANK";
}
-};
\ No newline at end of file
+}
diff --git a/src/ClosedXML.Parser.Tests/Lexers/PrattParserPrecedenceTests.cs b/src/ClosedXML.Parser.Tests/Lexers/PrattParserPrecedenceTests.cs
new file mode 100644
index 0000000..750d1d4
--- /dev/null
+++ b/src/ClosedXML.Parser.Tests/Lexers/PrattParserPrecedenceTests.cs
@@ -0,0 +1,63 @@
+using System.Diagnostics;
+using ClosedXML.Parser.Pratt;
+
+namespace ClosedXML.Parser.Tests.Lexers;
+
+public class PrattParserPrecedenceTests
+{
+ [Theory]
+ [InlineData("1+2+3+4", "(((1+2)+3)+4)")]
+ [InlineData("1-2-3-4", "(((1-2)-3)-4)")]
+ [InlineData("1-2+3-4+5", "((((1-2)+3)-4)+5)")]
+ [InlineData("1*2*3*4", "(((1*2)*3)*4)")]
+ [InlineData("1/2/3/4", "(((1/2)/3)/4)")]
+ [InlineData("1*2/3*4/5", "((((1*2)/3)*4)/5)")]
+ [InlineData("2^3^4^5", "(((2^3)^4)^5)")] // Even exponential is left-associative in Excel, contrary to standard convention
+ public void Operations_with_same_precedence_are_left_associative(string formula, string normalizedForm)
+ {
+ AssertSameFormulas(formula, normalizedForm);
+ }
+
+ [Theory]
+ [InlineData("1+(2+3+4)+((5+6)+7)", "((1+((2+3)+4))+((5+6)+7))")]
+ [InlineData("1-(2-3-4)-((5-6)-7)", "((1-((2-3)-4))-((5-6)-7))")]
+ [InlineData("1-(2+3-4)+((5-6)+7)", "((1-((2+3)-4))+((5-6)+7))")]
+ [InlineData("1*(2*3*4)*((5*6)*7)", "((1*((2*3)*4))*((5*6)*7))")]
+ [InlineData("1/(2/3/4)/((5/6)/7)", "((1/((2/3)/4))/((5/6)/7))")]
+ [InlineData("1/(2*3/4)*((5/6)*7)", "((1/((2*3)/4))*((5/6)*7))")]
+ [InlineData("2^(3^4)^5", "((2^(3^4))^5)")]
+ public void Groups_override_precedence(string formula, string normalizedForm)
+ {
+ AssertSameFormulas(formula, normalizedForm);
+ }
+
+ [Theory]
+ [InlineData("1+2*3+4/5*6^7-8", "(((1+(2*3))+((4/5)*(6^7)))-8)")]
+ [InlineData("1+2-3*4+5/6^7-8*9", "((((1+2)-(3*4))+(5/(6^7)))-(8*9))")]
+ public void Operations_are_grouped_by_precedence(string formula, string normalizedForm)
+ {
+ AssertSameFormulas(formula, normalizedForm);
+ }
+
+ private static void AssertSameFormulas(string formula, string normalizedForm)
+ {
+ var parser = ParserFactory.Create(new F());
+ var root = parser.ParseFormula(formula, new Ctx());
+
+ Assert.Equal(normalizedForm, GetNormalizedForm(root));
+ }
+
+ private static string GetNormalizedForm(AstNode node)
+ {
+ return node switch
+ {
+ ValueNode value => value.GetDisplayString(A1),
+ BinaryNode binaryOp => "(" +
+ GetNormalizedForm(binaryOp.Children[0]) +
+ binaryOp.GetDisplayString(A1) +
+ GetNormalizedForm(binaryOp.Children[1]) +
+ ")",
+ _ => throw new UnreachableException()
+ };
+ }
+}
diff --git a/src/ClosedXML.Parser.sln.DotSettings b/src/ClosedXML.Parser.sln.DotSettings
index c24fb3c..c0b03b0 100644
--- a/src/ClosedXML.Parser.sln.DotSettings
+++ b/src/ClosedXML.Parser.sln.DotSettings
@@ -5,6 +5,7 @@
True
True
True
+ True
True
True
True
diff --git a/src/ClosedXML.Parser/Pratt/BindingPower.cs b/src/ClosedXML.Parser/Pratt/BindingPower.cs
new file mode 100644
index 0000000..a4a2765
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/BindingPower.cs
@@ -0,0 +1,15 @@
+namespace ClosedXML.Parser.Pratt;
+
+///
+/// Values of binding power for operators in an expression. Higher number = higher binding power.
+/// Precedence of operators is specified by ISO-29500:18.17.2.2. Operators that have the same
+/// precedence associate left-to-right.
+///
+internal static class BindingPower
+{
+ internal const int Addition = 3;
+ internal const int Subtraction = 3;
+ internal const int Multiplication = 4;
+ internal const int Division = 4;
+ internal const int Exponentiation = 5;
+}
diff --git a/src/ClosedXML.Parser/Pratt/IParselet.cs b/src/ClosedXML.Parser/Pratt/IParselet.cs
new file mode 100644
index 0000000..74af3c5
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/IParselet.cs
@@ -0,0 +1,8 @@
+namespace ClosedXML.Parser.Pratt;
+
+internal interface IParselet
+{
+ Node Parse(TContext ctx, Node left, Token op);
+
+ int GetBindingPower();
+}
diff --git a/src/ClosedXML.Parser/Pratt/IPrefixParselet.cs b/src/ClosedXML.Parser/Pratt/IPrefixParselet.cs
new file mode 100644
index 0000000..95c0f3a
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/IPrefixParselet.cs
@@ -0,0 +1,6 @@
+namespace ClosedXML.Parser.Pratt;
+
+internal interface IPrefixParselet
+{
+ Node Parse(TContext ctx, Token token);
+}
diff --git a/src/ClosedXML.Parser/Pratt/Lexer.cs b/src/ClosedXML.Parser/Pratt/Lexer.cs
index 2122088..3ca760a 100644
--- a/src/ClosedXML.Parser/Pratt/Lexer.cs
+++ b/src/ClosedXML.Parser/Pratt/Lexer.cs
@@ -16,8 +16,7 @@ internal class Lexer
private static readonly bool[] IsOp;
private readonly Queue _queue = new(4);
- private readonly string _input;
-
+ private string _input = string.Empty; // Currently tokenized formula
private int _start; // The start index of currently parsed token in Next()
private int _i; // Index of current code point _c in _input
private int _c; // A current code point (including astral planes) or -1 if at the EOF
@@ -30,18 +29,30 @@ static Lexer()
IsOp[op] = true;
}
- ///
- /// Create a new instance of a lexer.
- ///
- /// Formula to tokenize.
+ public Lexer()
+ : this(string.Empty)
+ {
+ }
+
public Lexer(string input)
{
- _input = input ?? throw new ArgumentNullException();
- _i = -1;
+ Reset(input);
}
private bool IsEof => _c == EOF;
+ ///
+ /// Prepare lexer to start tokenization of the .
+ ///
+ /// Formula to tokenize.
+ public void Reset(string formula)
+ {
+ _input = formula ?? throw new ArgumentNullException();
+ _start = -1;
+ _i = -1;
+ _c = 0;
+ }
+
public Token Consume()
{
if (_queue.Count == 0)
diff --git a/src/ClosedXML.Parser/Pratt/Node.cs b/src/ClosedXML.Parser/Pratt/Node.cs
new file mode 100644
index 0000000..7ae92be
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/Node.cs
@@ -0,0 +1,44 @@
+namespace ClosedXML.Parser.Pratt;
+
+///
+/// An info about node used during parsing.
+///
+/// The TNode type of a node from .
+internal readonly struct Node
+{
+ public Node(T value, int start, int end)
+ : this(value, new SymbolRange(start, end))
+ {
+ }
+
+ public Node(T value, SymbolRange range)
+ {
+ Value = value;
+ Range = range;
+ }
+
+ ///
+ /// Parsed value of a node, created by the .
+ ///
+ public T Value { get; }
+
+ ///
+ /// A range that was used to created the node.
+ ///
+ public SymbolRange Range { get; }
+
+ public static implicit operator T(Node node)
+ {
+ return node.Value;
+ }
+
+ internal Node ExtendLeft(Token token)
+ {
+ return new Node(Value, token.Range.ExtendRight(Range));
+ }
+
+ internal Node ExtendRight(Token token)
+ {
+ return new Node(Value, Range.ExtendRight(token.Range));
+ }
+}
diff --git a/src/ClosedXML.Parser/Pratt/Parselets/BinaryOpParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/BinaryOpParselet.cs
new file mode 100644
index 0000000..6305502
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/Parselets/BinaryOpParselet.cs
@@ -0,0 +1,34 @@
+namespace ClosedXML.Parser.Pratt.Parselets;
+
+internal class BinaryOpParselet : IParselet
+{
+ private readonly IAstFactory _factory;
+ private readonly Parser _parser;
+ private readonly BinaryOperation _op;
+ private readonly int _bp;
+
+ public BinaryOpParselet(IAstFactory factory, Parser parser, BinaryOperation op, int bp)
+ {
+ _factory = factory;
+ _parser = parser;
+ _op = op;
+ _bp = bp;
+ }
+
+ public Node Parse(TContext ctx, Node left, Token op)
+ {
+ var right = _parser.ParseExpression(ctx, _bp);
+ var nodeRange = left.Range
+ .ExtendRight(op.Range)
+ .ExtendRight(right.Range);
+
+ var node = _factory.BinaryNode(ctx, nodeRange, _op, left, right);
+ return new Node(node, nodeRange);
+ }
+
+ public int GetBindingPower()
+ {
+ return _bp;
+ }
+}
+
diff --git a/src/ClosedXML.Parser/Pratt/Parselets/GroupParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/GroupParselet.cs
new file mode 100644
index 0000000..f3c8013
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/Parselets/GroupParselet.cs
@@ -0,0 +1,18 @@
+namespace ClosedXML.Parser.Pratt.Parselets;
+
+internal class GroupParselet : IPrefixParselet
+{
+ private readonly Parser _parser;
+
+ public GroupParselet(Parser parser)
+ {
+ _parser = parser;
+ }
+
+ public Node Parse(TContext ctx, Token leftParen)
+ {
+ var node = _parser.ParseExpression(ctx, 0);
+ var rightParen = _parser.Consume(TokenType.RightParen);
+ return node.ExtendLeft(leftParen).ExtendRight(rightParen);
+ }
+}
diff --git a/src/ClosedXML.Parser/Pratt/Parselets/NumberParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/NumberParselet.cs
new file mode 100644
index 0000000..28a95eb
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/Parselets/NumberParselet.cs
@@ -0,0 +1,34 @@
+using System.Globalization;
+
+namespace ClosedXML.Parser.Pratt.Parselets;
+
+///
+/// Get a number node from a token.
+///
+///
+/// double.Parse parses even NaN or ∞, but we can never receive such text
+/// from the lexer.
+///
+internal class NumberParselet : IPrefixParselet
+{
+ private readonly IAstFactory _factory;
+ private readonly Parser _parser;
+
+ public NumberParselet(IAstFactory factory, Parser parser)
+ {
+ _factory = factory;
+ _parser = parser;
+ }
+
+ public Node Parse(TContext ctx, Token token)
+ {
+#if NETSTANDARD2_1
+ var text = token.GetText(_parser.Input);
+#else
+ var text = token.GetText(_parser.Input).ToString(); // NetFx has a double whammy, it's slow and gets extra memory to GC
+#endif
+ var number = double.Parse(text, NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
+ var node = _factory.NumberNode(ctx, token.Range, number);
+ return new Node(node, token.Range);
+ }
+}
diff --git a/src/ClosedXML.Parser/Pratt/Parser.cs b/src/ClosedXML.Parser/Pratt/Parser.cs
new file mode 100644
index 0000000..254c270
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/Parser.cs
@@ -0,0 +1,77 @@
+using System;
+using System.Collections.Generic;
+
+namespace ClosedXML.Parser.Pratt;
+
+///
+/// Pratt parser.
+///
+internal class Parser
+{
+ private readonly Lexer _lexer = new();
+ private readonly Dictionary> _prefixParselets = new();
+ private readonly Dictionary> _parselets = new();
+
+ internal string Input { get; private set; } = string.Empty;
+
+ public T ParseFormula(string formula, TContext ctx)
+ {
+ Input = formula;
+ _lexer.Reset(formula);
+ return ParseExpression(ctx, 0).Value;
+ }
+
+ internal Node ParseExpression(TContext ctx, int minBp)
+ {
+ var node = Prefix(ctx);
+
+ while (true)
+ {
+ var maybeOp = _lexer.Peek();
+ if (maybeOp.Type == TokenType.Eof)
+ break;
+
+ var isOp = _parselets.TryGetValue(maybeOp.Type, out var parselet);
+ if (!isOp)
+ break;
+
+ var bp = parselet!.GetBindingPower();
+ if (bp <= minBp)
+ break;
+
+ var op = _lexer.Consume();
+ node = parselet.Parse(ctx, node, op);
+ }
+
+ return node;
+ }
+
+ private Node Prefix(TContext ctx)
+ {
+ var token = _lexer.Consume();
+
+ if (!_prefixParselets.TryGetValue(token.Type, out var parselet))
+ throw new InvalidOperationException($"No parselet found for {token.Type}.");
+
+ return parselet.Parse(ctx, token);
+ }
+
+ internal Token Consume(TokenType expectedType)
+ {
+ var token = _lexer.Consume();
+ if (token.Type != expectedType)
+ throw new InvalidOperationException($"Expected token of type {expectedType}, but received {token.Type}.");
+
+ return token;
+ }
+
+ internal void Register(TokenType type, IPrefixParselet parselet)
+ {
+ _prefixParselets.Add(type, parselet);
+ }
+
+ internal void Register(TokenType type, IParselet parselet)
+ {
+ _parselets.Add(type, parselet);
+ }
+}
diff --git a/src/ClosedXML.Parser/Pratt/ParserFactory.cs b/src/ClosedXML.Parser/Pratt/ParserFactory.cs
new file mode 100644
index 0000000..84f22cb
--- /dev/null
+++ b/src/ClosedXML.Parser/Pratt/ParserFactory.cs
@@ -0,0 +1,25 @@
+using ClosedXML.Parser.Pratt.Parselets;
+
+namespace ClosedXML.Parser.Pratt;
+
+internal static class ParserFactory
+{
+ public static Parser Create(
+ IAstFactory factory)
+ {
+ var parser = new Parser();
+
+ // Register prefix parselets
+ parser.Register(TokenType.Number, new NumberParselet(factory, parser));
+ parser.Register(TokenType.LeftParen, new GroupParselet(parser));
+
+ // Register operation parselets
+ parser.Register(TokenType.Plus, new BinaryOpParselet(factory, parser, BinaryOperation.Addition, BindingPower.Addition));
+ parser.Register(TokenType.Minus, new BinaryOpParselet(factory, parser, BinaryOperation.Subtraction, BindingPower.Subtraction));
+ parser.Register(TokenType.Mul, new BinaryOpParselet(factory, parser, BinaryOperation.Multiplication, BindingPower.Multiplication));
+ parser.Register(TokenType.Div, new BinaryOpParselet(factory, parser, BinaryOperation.Division, BindingPower.Division));
+ parser.Register(TokenType.Pow, new BinaryOpParselet(factory, parser, BinaryOperation.Power, BindingPower.Exponentiation));
+
+ return parser;
+ }
+}
diff --git a/src/ClosedXML.Parser/Pratt/Token.cs b/src/ClosedXML.Parser/Pratt/Token.cs
index 1166cff..69103d9 100644
--- a/src/ClosedXML.Parser/Pratt/Token.cs
+++ b/src/ClosedXML.Parser/Pratt/Token.cs
@@ -4,18 +4,18 @@ namespace ClosedXML.Parser.Pratt;
internal readonly struct Token
{
- private readonly SymbolRange _text;
-
public Token(TokenType type, int start, int end)
{
Type = type;
- _text = new SymbolRange(start, end);
+ Range = new SymbolRange(start, end);
}
public TokenType Type { get; }
+ public SymbolRange Range { get; }
+
public ReadOnlySpan GetText(string input)
{
- return input.AsSpan(_text.Start, _text.Length);
+ return input.AsSpan(Range.Start, Range.Length);
}
-}
\ No newline at end of file
+}
diff --git a/src/ClosedXML.Parser/SymbolRange.cs b/src/ClosedXML.Parser/SymbolRange.cs
index c1b1c4e..bfe42cc 100644
--- a/src/ClosedXML.Parser/SymbolRange.cs
+++ b/src/ClosedXML.Parser/SymbolRange.cs
@@ -1,4 +1,6 @@
-namespace ClosedXML.Parser;
+using System;
+
+namespace ClosedXML.Parser;
///
/// A range of a symbol in formula text.
@@ -37,4 +39,12 @@ public override string ToString()
{
return $"[{Start}:{End}]";
}
-}
\ No newline at end of file
+
+ internal SymbolRange ExtendRight(SymbolRange rangeToRight)
+ {
+ if (End != rangeToRight.Start)
+ throw new InvalidOperationException($"The range end {End} doesn't match start of the range to the right {rangeToRight.Start}.");
+
+ return new SymbolRange(Start, rangeToRight.End);
+ }
+}