From 20995d2bcd183d3e3da83500aa35f1f899d3c202 Mon Sep 17 00:00:00 2001 From: jahav Date: Tue, 21 Oct 2025 02:55:58 +0200 Subject: [PATCH 1/5] Pratt parser - parse ident The IdentParselet will resolve all references that start with the ident token. --- src/ClosedXML.Parser.Ast/SheetNameNode.cs | 5 +- .../Lexers/ParseletIdentTests.cs | 76 ++++++ src/ClosedXML.Parser/NameUtils.cs | 21 ++ src/ClosedXML.Parser/Pratt/CompatUtils.cs | 24 ++ src/ClosedXML.Parser/Pratt/Lexer.cs | 11 +- .../Pratt/Parselets/IdentParselet.cs | 122 +++++++++ .../Pratt/Parselets/ParserExtensions.cs | 246 ++++++++++++++++++ src/ClosedXML.Parser/Pratt/Parser.cs | 10 + src/ClosedXML.Parser/Pratt/ParserFactory.cs | 1 + src/ClosedXML.Parser/RowCol.cs | 2 + 10 files changed, 513 insertions(+), 5 deletions(-) create mode 100644 src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs create mode 100644 src/ClosedXML.Parser/Pratt/CompatUtils.cs create mode 100644 src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs create mode 100644 src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs diff --git a/src/ClosedXML.Parser.Ast/SheetNameNode.cs b/src/ClosedXML.Parser.Ast/SheetNameNode.cs index 00098dd..a67bd4c 100644 --- a/src/ClosedXML.Parser.Ast/SheetNameNode.cs +++ b/src/ClosedXML.Parser.Ast/SheetNameNode.cs @@ -4,6 +4,7 @@ public record SheetNameNode(string Sheet, string Name) : AstNode { public override string GetDisplayString(ReferenceStyle style) { - return $"[{Sheet}]!{Name}"; + var sheet = NameUtils.ShouldQuote(Sheet) ? '\'' + Sheet.Replace("'", "''") + '\'' : Sheet; + return $"{sheet}!{Name}"; } -} \ No newline at end of file +} diff --git a/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs new file mode 100644 index 0000000..5f94efe --- /dev/null +++ b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs @@ -0,0 +1,76 @@ +using ClosedXML.Parser.Pratt; + +namespace ClosedXML.Parser.Tests.Lexers; + +public class ParseletIdentTests +{ + [Theory] + + // Local area + [InlineData("A1:B1", typeof(ReferenceNode))] + [InlineData("$A$1:$B$1", typeof(ReferenceNode))] + + // Local cell + [InlineData("A1", typeof(ReferenceNode))] + [InlineData("A$1", typeof(ReferenceNode))] + [InlineData("$A1", typeof(ReferenceNode))] + [InlineData("$A$1", typeof(ReferenceNode))] + [InlineData("XFD1048576", typeof(ReferenceNode))] + [InlineData("XFD$1048576", typeof(ReferenceNode))] + [InlineData("$XFD1048576", typeof(ReferenceNode))] + [InlineData("$XFD$1048576", typeof(ReferenceNode))] + + // Local colspan + [InlineData("A:B", typeof(ReferenceNode))] + [InlineData("$GE:$XFD", typeof(ReferenceNode))] + + // Local rowspan starting with absolute + [InlineData("$1:8", typeof(ReferenceNode))] + [InlineData("$72:$85", typeof(ReferenceNode))] + + // sheet!A1:A2 + [InlineData("Sheet!A1:B2", typeof(SheetReferenceNode))] + [InlineData("Sheet!$Z$84:$BG$99", typeof(SheetReferenceNode))] + + // sheet!A1 + [InlineData("Sheet!A1", typeof(SheetReferenceNode))] + [InlineData("Sheet!$Z$84", typeof(SheetReferenceNode))] + + // sheet!$1:2 + [InlineData("Sheet!$4:81", typeof(SheetReferenceNode))] + [InlineData("Sheet!$1:$5", typeof(SheetReferenceNode))] + + // sheet!name + [InlineData("Sheet!name", typeof(SheetNameNode))] + [InlineData("Sheet!_name", typeof(SheetNameNode))] + + // sheet!1:2 + [InlineData("Sheet!1:2", typeof(SheetReferenceNode))] + [InlineData("Sheet!1:$2", typeof(SheetReferenceNode))] + + // name + [InlineData("_name", typeof(NameNode))] + [InlineData("name", typeof(NameNode))] + public void Can_parse_references_starting_at_ident(string formula, Type expectedNodeType) + { + var parser = ParserFactory.Create(new F()); + var root = parser.ParseFormula(formula, new Ctx()); + + Assert.Equal(expectedNodeType, root.GetType()); + Assert.Equal(formula, root.GetDisplayString(A1)); + } + + [Theory] + [InlineData("sheet!$")] + [InlineData("sheet!")] + [InlineData("$")] + [InlineData("A01")] + [InlineData("A0")] + [InlineData("A1048577")] + [InlineData("XFE1")] + public void Invalid_references_starting_with_ident_throw_parsing_exception(string formula) + { + var parser = ParserFactory.Create(new F()); + Assert.Throws(() => parser.ParseFormula(formula, new Ctx())); + } +} diff --git a/src/ClosedXML.Parser/NameUtils.cs b/src/ClosedXML.Parser/NameUtils.cs index e9faf1d..46954e2 100644 --- a/src/ClosedXML.Parser/NameUtils.cs +++ b/src/ClosedXML.Parser/NameUtils.cs @@ -105,6 +105,27 @@ public static bool IsSheetNameValid(ReadOnlySpan sheetName) return sheetName.IndexOfAny(InvalidSheetChars) == -1; } + internal static bool IsNameValid(ReadOnlySpan name) + { + if (name.Length is < 1 or > 255) + return false; + + // TODO: Determine what is a valid name and make the method public. + // Alert box says: + // * Starts with a letter or underscore + // * no space or char that is not allowed + if (name[0] != '_' && !char.IsLetter(name[0])) + return false; + + foreach (var nextNameChar in name.Slice(1)) + { + if (!char.IsLetter(nextNameChar)) + return false; + } + + return true; + } + internal static StringBuilder EscapeName(StringBuilder sb, string sheet) { return ShouldQuote(sheet.AsSpan()) diff --git a/src/ClosedXML.Parser/Pratt/CompatUtils.cs b/src/ClosedXML.Parser/Pratt/CompatUtils.cs new file mode 100644 index 0000000..164a64c --- /dev/null +++ b/src/ClosedXML.Parser/Pratt/CompatUtils.cs @@ -0,0 +1,24 @@ +namespace ClosedXML.Parser.Pratt; + +/// +/// Various methods that are not present in .net standard 2.0. +/// +internal static class CompatUtils +{ + /// + /// Replacement for char.IsAsciiLetter that isn't in the netstandard 2.0 + /// + public static bool IsAsciiLetter(char c) + { + return c is >= 'A' and <= 'Z' || + c is >= 'a' and <= 'z'; + } + + /// + /// Replacement for char.IsAsciiDigit that isn't in the netstandard 2.0 + /// + public static bool IsAsciiDigit(char c) + { + return c is >= '0' and <= '9'; + } +} diff --git a/src/ClosedXML.Parser/Pratt/Lexer.cs b/src/ClosedXML.Parser/Pratt/Lexer.cs index 3ca760a..e3bd625 100644 --- a/src/ClosedXML.Parser/Pratt/Lexer.cs +++ b/src/ClosedXML.Parser/Pratt/Lexer.cs @@ -61,12 +61,17 @@ public Token Consume() return _queue.Dequeue(); } - public Token Peek() + public Token Peek(int distance = 1) { - if (_queue.Count == 0) + // TODO: Replace BCL queue with a structure that allows index access + while (_queue.Count < distance) _queue.Enqueue(Next()); - return _queue.Peek(); + var enumerator = _queue.GetEnumerator(); + for (var i = 0; i < distance; ++i) + enumerator.MoveNext(); + + return enumerator.Current; } private Token Next() diff --git a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs new file mode 100644 index 0000000..d515a6a --- /dev/null +++ b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs @@ -0,0 +1,122 @@ +namespace ClosedXML.Parser.Pratt.Parselets; + +internal class IdentParselet : IPrefixParselet +{ + private readonly IAstFactory _factory; + private readonly Parser _parser; + + public IdentParselet(IAstFactory factory, Parser parser) + { + _factory = factory; + _parser = parser; + } + + public Node Parse(TContext ctx, Token token) + { + // When we receive an ident, there are following possibilities what it could be (checked + // in this order): + // * A1:B2 + // * A1 + // * A:B + // * $4:6 - rowspan starting with an absolute row + // * sheet!A1:A2 + // * sheet!A1 + // * sheet!A:B + // * sheet!$1:2 + // * sheet!name + // * sheet!1:2 + // * name + + // Check for area `A1:B2` or just cell `A1` + if (_parser.TryLocalAreaA1(token, out var localArea, out var localAreaRange)) + { + var value = _factory.Reference(ctx, localAreaRange, localArea); + return new Node(value, localAreaRange); + } + + // Check for colspan `A:B` + if (_parser.TryLocalColSpanA1(token, out var localColSpan, out var localColSpanRange)) + { + var value = _factory.Reference(ctx, localColSpanRange, localColSpan); + return new Node(value, localColSpanRange); + } + + // Check for colspan `$1:2` + if (_parser.TryLocalRowSpanA1(token, out var localRowSpan, out var localRowSpanRange)) + { + var value = _factory.Reference(ctx, localRowSpanRange, localRowSpan); + return new Node(value, localRowSpanRange); + } + + if (_parser.TryGetUnquotedSheet(token, out var sheetNameSpan) && _parser.LookAhead(1).Type == TokenType.Bang) + { + // We are now in `sheet!` Parse local reference. + var sheetName = sheetNameSpan.ToString(); // String allocation, needed for the IAstFactory + var bangToken = _parser.Consume(TokenType.Bang); + var sheetWithBangRange = token.Range.ExtendRight(bangToken.Range); + + if (_parser.LookAhead(1) is { Type: TokenType.Ident } sheetRefToken) + { + _parser.Consume(TokenType.Ident); + + // Check for area `sheet!A1:B2` or just cell `sheet!A1` + if (_parser.TryLocalAreaA1(sheetRefToken, out var sheetArea, out var sheetAreaRange)) + { + var range = sheetWithBangRange.ExtendRight(sheetAreaRange); + var value = _factory.SheetReference(ctx, range, sheetName, sheetArea); + return new Node(value, range); + } + + // Check for colspan `sheet!A:B` + if (_parser.TryLocalColSpanA1(sheetRefToken, out var sheetColSpan, out var sheetColSpanRange)) + { + var range = sheetWithBangRange.ExtendRight(sheetColSpanRange); + var value = _factory.SheetReference(ctx, range, sheetName, sheetColSpan); + return new Node(value, range); + } + + // Check for rowspan `sheet!$1:2` The $1 is an ident, but this doesn't detect + // rowspan starting with a relative row. That is checked below with a token number. + if (_parser.TryLocalRowSpanA1(sheetRefToken, out var sheetAbsRowSpan, out var sheetAbsRowSpanRange)) + { + var range = sheetWithBangRange.ExtendRight(sheetAbsRowSpanRange); + var value = _factory.SheetReference(ctx, range, sheetName, sheetAbsRowSpan); + return new Node(value, range); + } + + // Check for colspan `sheet!name` + if (_parser.TryGetName(sheetRefToken, out var name)) + { + var range = sheetWithBangRange.ExtendRight(sheetRefToken.Range); + var value = _factory.SheetName(ctx, range, sheetName, name.ToString()); // String allocation, needed for the IAstFactory + return new Node(value, range); + } + + throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + } + + // Check for rowspan `sheet!1:2` with relative start row + if (_parser.LookAhead(1).Type == TokenType.Number) + { + var sheetRowToken = _parser.Consume(TokenType.Number); + if (_parser.TryLocalRowSpanA1(sheetRowToken, out var sheetRowSpan, out var sheetRowSpanRange)) + { + var range = sheetWithBangRange.ExtendRight(sheetRowSpanRange); + var value = _factory.SheetReference(ctx, range, sheetName, sheetRowSpan); + return new Node(value, range); + } + } + + throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + } + + // Check for rowspan `name` + if (_parser.TryGetName(token, out var workbookName)) + { + var value = _factory.Name(ctx, token.Range, workbookName.ToString()); // String allocation, needed for the IAstFactory + return new Node(value, token.Range); + } + + throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + } +} diff --git a/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs new file mode 100644 index 0000000..d22885a --- /dev/null +++ b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs @@ -0,0 +1,246 @@ +using System; +using System.Diagnostics; +using static ClosedXML.Parser.Pratt.CompatUtils; + +namespace ClosedXML.Parser.Pratt.Parselets; + +internal static class ParserExtensions +{ + private const int MIN_A1_LENGTH = 2; // A1 + private const int MAX_A1_LENGTH = 1 + 3 + 1 + 7; // $XFD$1048576 + private const int MIN_COL_LENGTH = 1; // A + private const int MAX_COL_LENGTH = 4; // $XFD + private const int MIN_ROW_LENGTH = 1; // 1 + private const int MAX_ROW_LENGTH = 8; // $1048576 + + public static bool TryLocalAreaA1(this Parser parser, Token identToken, out ReferenceArea area, out SymbolRange range) + { + Debug.Assert(identToken.Type == TokenType.Ident); + var ident = identToken.GetText(parser.Input); + + if (TryGetCellA1(ident, out var cell1)) + { + if (parser.LookAhead(1).Type == TokenType.Range && + parser.LookAhead(2) is { Type: TokenType.Ident } maybeCell2Token && + TryGetCellA1(maybeCell2Token.GetText(parser.Input), out var cell2)) + { + // Result: area A1:B2 + // The code is joining two cells into an area through range operator, but that + // is allowed. Range is highest priority operator, left to right associativity. + var rangeToken = parser.Consume(TokenType.Range); + var cell2Token = parser.Consume(TokenType.Ident); + + area = new ReferenceArea(cell1, cell2); + range = identToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(cell2Token.Range); + return true; + } + + // Result: cell A1 + area = new ReferenceArea(cell1); + range = identToken.Range; + return true; + } + + range = default; + area = default; + return false; + } + + public static bool TryLocalColSpanA1(this Parser parser, Token identToken, out ReferenceArea area, out SymbolRange range) + { + Debug.Assert(identToken.Type == TokenType.Ident); + var ident = identToken.GetText(parser.Input); + + // Careful, 'A' can be just a name without the other column + if (TryGetColA1(ident, out var col1) && + parser.LookAhead(1).Type == TokenType.Range && + parser.LookAhead(2) is { Type: TokenType.Ident } maybeCol2Token && + TryGetColA1(maybeCol2Token.GetText(parser.Input), out var col2)) + { + // Result: colspan A:B + var rangeToken = parser.Consume(TokenType.Range); + var col2Token = parser.Consume(TokenType.Ident); + + area = new ReferenceArea(col1, col2); + range = identToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(col2Token.Range); + return true; + } + + area = default; + range = default; + return false; + } + + public static bool TryLocalRowSpanA1(this Parser parser, Token numberOrIdentToken, out ReferenceArea area, out SymbolRange range) + { + Debug.Assert(numberOrIdentToken.Type is TokenType.Ident or TokenType.Number); + var numberOrIdent = numberOrIdentToken.GetText(parser.Input); + + if (TryGetRowA1(numberOrIdent, out var row1) && + parser.LookAhead(1).Type == TokenType.Range && + parser.LookAhead(2) is { Type: TokenType.Number or TokenType.Ident } maybeRow2Token && + TryGetRowA1(maybeRow2Token.GetText(parser.Input), out var row2)) + { + // Result: rowspan 1:2 + var rangeToken = parser.Consume(TokenType.Range); + var row2Token = parser.Consume(); + + area = new ReferenceArea(row1, row2); + range = numberOrIdentToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(row2Token.Range); + return true; + } + + area = default; + range = default; + return false; + } + + public static bool TryGetUnquotedSheet(this Parser parser, Token identToken, out ReadOnlySpan sheetName) + { + var text = identToken.GetText(parser.Input); + var isUnquotedSheet = NameUtils.IsSheetNameValid(text) && !NameUtils.ShouldQuote(text); + if (isUnquotedSheet) + { + sheetName = text; + return true; + } + + sheetName = string.Empty.AsSpan(); + return false; + } + + public static bool TryGetName(this Parser parser, Token identToken, out ReadOnlySpan name) + { + Debug.Assert(identToken.Type == TokenType.Ident); + var text = identToken.GetText(parser.Input); + + if (NameUtils.IsNameValid(text)) + { + name = text; + return true; + } + + name = string.Empty.AsSpan(); + return false; + } + + /// + /// Is the a valid A1 cell reference? No padding, case insensitive. + /// + public static bool TryGetCellA1(ReadOnlySpan text, out RowCol cell) + { + cell = default; + if (text.Length is < MIN_A1_LENGTH or > MAX_A1_LENGTH) + return false; + + var i = 0; + var absCol = text[i] == '$'; + if (absCol) ++i; + + var col = 0; + while (i < text.Length && IsAsciiLetter(text[i])) + col = col * 26 + GetColIndex(text[i++]) + 1; + + if (col is < RowCol.MinCol or > RowCol.MaxCol || i >= text.Length) + return false; + + var absRow = text[i] == '$'; + if (absRow) + { + if (++i >= text.Length) + return false; + } + + if (text[i] == '0') + return false; + + var row = 0; + while (i < text.Length && IsAsciiDigit(text[i])) + row = row * 10 + text[i++] - '0'; + + if (row is < RowCol.MinRow or > RowCol.MaxRow || i < text.Length) + return false; + + cell = new RowCol( + absRow ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, row, + absCol ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, col, + A1); + return true; + } + + /// + /// Is the a valid end of an A1 colspan? No padding, case insensitive. + /// Valid examples: A, a, $A, $XFD. + /// Invalid examples: A , $ a, $, $XFE. + /// + public static bool TryGetColA1(ReadOnlySpan text, out RowCol colRef) + { + colRef = default; + if (text.Length is < MIN_COL_LENGTH or > MAX_COL_LENGTH) + return false; + + var i = 0; + var absCol = text[i] == '$'; + if (absCol) ++i; + + var col = 0; + while (i < text.Length && IsAsciiLetter(text[i])) + col = col * 26 + GetColIndex(text[i++]) + 1; + + if (col is < RowCol.MinCol or > RowCol.MaxCol || i < text.Length) + return false; + + colRef = new RowCol( + ReferenceAxisType.None, 0, + absCol ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, col, + A1); + return true; + } + + /// + /// Is the a valid end of an A1 rowspan? No padding. + /// Valid examples: 1, $1, $1048576. + /// Invalid examples: 1.0, $ 1, $, $1048577. + /// + public static bool TryGetRowA1(ReadOnlySpan text, out RowCol rowRef) + { + rowRef = default; + if (text.Length is < MIN_ROW_LENGTH or > MAX_ROW_LENGTH) + return false; + + var i = 0; + var absRow = text[i] == '$'; + if (absRow) + { + if (++i >= text.Length) + return false; + } + + if (text[i] == '0') + return false; + + var row = 0; + while (i < text.Length && IsAsciiDigit(text[i])) + row = row * 10 + text[i++] - '0'; + + if (row is < RowCol.MinRow or > RowCol.MaxRow || i < text.Length) + return false; + + rowRef = new RowCol( + absRow ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, row, + ReferenceAxisType.None, 0, + A1); + return true; + } + + private static int GetColIndex(char asciiLetter) + { + return (asciiLetter | 0x20) - 'a'; + } +} diff --git a/src/ClosedXML.Parser/Pratt/Parser.cs b/src/ClosedXML.Parser/Pratt/Parser.cs index 254c270..c7f3578 100644 --- a/src/ClosedXML.Parser/Pratt/Parser.cs +++ b/src/ClosedXML.Parser/Pratt/Parser.cs @@ -56,6 +56,11 @@ private Node Prefix(TContext ctx) return parselet.Parse(ctx, token); } + public Token LookAhead(int distance) + { + return _lexer.Peek(distance); + } + internal Token Consume(TokenType expectedType) { var token = _lexer.Consume(); @@ -65,6 +70,11 @@ internal Token Consume(TokenType expectedType) return token; } + internal Token Consume() + { + return _lexer.Consume(); + } + internal void Register(TokenType type, IPrefixParselet parselet) { _prefixParselets.Add(type, parselet); diff --git a/src/ClosedXML.Parser/Pratt/ParserFactory.cs b/src/ClosedXML.Parser/Pratt/ParserFactory.cs index 84f22cb..a2bf4e1 100644 --- a/src/ClosedXML.Parser/Pratt/ParserFactory.cs +++ b/src/ClosedXML.Parser/Pratt/ParserFactory.cs @@ -12,6 +12,7 @@ public static Parser Create( // Register prefix parselets parser.Register(TokenType.Number, new NumberParselet(factory, parser)); parser.Register(TokenType.LeftParen, new GroupParselet(parser)); + parser.Register(TokenType.Ident, new IdentParselet(factory, parser)); // Register operation parselets parser.Register(TokenType.Plus, new BinaryOpParselet(factory, parser, BinaryOperation.Addition, BindingPower.Addition)); diff --git a/src/ClosedXML.Parser/RowCol.cs b/src/ClosedXML.Parser/RowCol.cs index b693873..e650400 100644 --- a/src/ClosedXML.Parser/RowCol.cs +++ b/src/ClosedXML.Parser/RowCol.cs @@ -31,7 +31,9 @@ namespace ClosedXML.Parser; /// public readonly struct RowCol : IEquatable { + internal const int MinRow = 1; internal const int MaxRow = 1048576; + internal const int MinCol = 1; internal const int MaxCol = 16384; // keep at 0, so default ctor creates is A1 From 7c28f0a7c4a7e4dc64f2550cfbc6941210494715 Mon Sep 17 00:00:00 2001 From: jahav Date: Wed, 22 Oct 2025 22:58:45 +0200 Subject: [PATCH 2/5] IdentParselet can recognize logical values Due to simplified token lexing, even logical values are represented in an ident token. That is useful to functions like TRUE() or FALSE(). --- .../Lexers/ParseletIdentTests.cs | 13 ++++++++++ .../Pratt/Parselets/IdentParselet.cs | 26 ++++++++++++++++++- .../Pratt/Parselets/ParserExtensions.cs | 2 +- 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs index 5f94efe..6057d69 100644 --- a/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs +++ b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs @@ -60,6 +60,19 @@ public void Can_parse_references_starting_at_ident(string formula, Type expected Assert.Equal(formula, root.GetDisplayString(A1)); } + [Theory] + [InlineData("TRUE", true)] + [InlineData("true", true)] + [InlineData("FALSE", false)] + [InlineData("false", false)] + public void Can_parse_logical(string formula, bool expectedValue) + { + var parser = ParserFactory.Create(new F()); + var root = parser.ParseFormula(formula, new Ctx()); + + Assert.Equal(new ValueNode(expectedValue), root); + } + [Theory] [InlineData("sheet!$")] [InlineData("sheet!")] diff --git a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs index d515a6a..103fca9 100644 --- a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs +++ b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs @@ -1,4 +1,6 @@ -namespace ClosedXML.Parser.Pratt.Parselets; +using System; + +namespace ClosedXML.Parser.Pratt.Parselets; internal class IdentParselet : IPrefixParselet { @@ -25,6 +27,7 @@ public Node Parse(TContext ctx, Token token) // * sheet!$1:2 // * sheet!name // * sheet!1:2 + // * TRUE/FALSE // * name // Check for area `A1:B2` or just cell `A1` @@ -110,6 +113,19 @@ public Node Parse(TContext ctx, Token token) throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); } + var tokenText = token.GetText(_parser.Input); + if (EqualCaseInsensitive(tokenText, "TRUE")) + { + var value = _factory.LogicalNode(ctx, token.Range, true); + return new Node(value, token.Range); + } + + if (EqualCaseInsensitive(tokenText, "FALSE")) + { + var value = _factory.LogicalNode(ctx, token.Range, false); + return new Node(value, token.Range); + } + // Check for rowspan `name` if (_parser.TryGetName(token, out var workbookName)) { @@ -119,4 +135,12 @@ public Node Parse(TContext ctx, Token token) throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); } + + private static bool EqualCaseInsensitive(ReadOnlySpan text, string other) + { + if (text.Length != other.Length) + return false; + + return text.CompareTo(other.AsSpan(), StringComparison.OrdinalIgnoreCase) == 0; + } } diff --git a/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs index d22885a..11bd4b4 100644 --- a/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs +++ b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs @@ -111,7 +111,7 @@ public static bool TryGetUnquotedSheet(this Parser par return true; } - sheetName = string.Empty.AsSpan(); + sheetName = default; return false; } From 0d5e5a3e194c278c62b61e3470fdb93d1506061f Mon Sep 17 00:00:00 2001 From: jahav Date: Wed, 22 Oct 2025 23:43:40 +0200 Subject: [PATCH 3/5] IdentParselet can recognize 3D references --- .../Lexers/ParseletIdentTests.cs | 19 ++++++ .../Pratt/Parselets/IdentParselet.cs | 37 +++++++++++ .../Pratt/Parselets/ParserExtensions.cs | 61 ++++++++++++++++--- 3 files changed, 109 insertions(+), 8 deletions(-) diff --git a/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs index 6057d69..2df78f5 100644 --- a/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs +++ b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs @@ -51,6 +51,22 @@ public class ParseletIdentTests // name [InlineData("_name", typeof(NameNode))] [InlineData("name", typeof(NameNode))] + + // sheet1:sheet2!A1:B2 + [InlineData("sheet1:sheet2!A1:B2", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$A$1:$B$2", typeof(Reference3DNode))] + + // sheet1:sheet2!A1 + [InlineData("sheet1:sheet2!A1", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$A$1", typeof(Reference3DNode))] + + // sheet1:sheet2!A:B + [InlineData("sheet1:sheet2!A:C", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$A:$C", typeof(Reference3DNode))] + + // sheet1:sheet2!1:2 + [InlineData("sheet1:sheet2!1:2", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$1:$2", typeof(Reference3DNode))] public void Can_parse_references_starting_at_ident(string formula, Type expectedNodeType) { var parser = ParserFactory.Create(new F()); @@ -81,6 +97,9 @@ public void Can_parse_logical(string formula, bool expectedValue) [InlineData("A0")] [InlineData("A1048577")] [InlineData("XFE1")] + [InlineData("sheet1:sheet2!")] + [InlineData("sheet1:sheet2!A")] + [InlineData("sheet1:sheet2!name")] // There is no such thing as 3D name public void Invalid_references_starting_with_ident_throw_parsing_exception(string formula) { var parser = ParserFactory.Create(new F()); diff --git a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs index 103fca9..f1addf6 100644 --- a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs +++ b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs @@ -28,6 +28,10 @@ public Node Parse(TContext ctx, Token token) // * sheet!name // * sheet!1:2 // * TRUE/FALSE + // * sheet1:sheet2!A1:A2 + // * sheet1:sheet2!A1 + // * sheet1:sheet2!A:B + // * sheet1:sheet2!$1:2 // * name // Check for area `A1:B2` or just cell `A1` @@ -126,6 +130,39 @@ public Node Parse(TContext ctx, Token token) return new Node(value, token.Range); } + // Check for 3D reference for unquoted sheets: + // * Sheet1:Sheet2!A1:B2 + // * Sheet1:Sheet2!A1 + // * Sheet1:Sheet2!A:B + // * Sheet1:Sheet2!1:2 + if (_parser.TryGetUnquotedSheet(token, out var startSheet) && + _parser.LookAhead(1).Type == TokenType.Range && + _parser.LookAhead(2) is { Type: TokenType.Ident } maybeEndSheetToken && + _parser.TryGetUnquotedSheet(maybeEndSheetToken, out var endSheet) && + _parser.LookAhead(3).Type == TokenType.Bang) + { + var sheetStartToken = token; + var rangeToken = _parser.Consume(TokenType.Range); + var sheetEndToken = _parser.Consume(TokenType.Ident); + var bangToken = _parser.Consume(TokenType.Bang); + var refToken = _parser.Consume(); + + if (_parser.TryReferenceA1(refToken, out var sheetRangeReference, out var sheetRangeReferenceRange)) + { + var range = sheetStartToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(sheetEndToken.Range) + .ExtendRight(bangToken.Range) + .ExtendRight(sheetRangeReferenceRange); + var startSheetString = startSheet.ToString(); // String allocation for the IAstFactory + var endSheetString = endSheet.ToString(); + var value = _factory.Reference3D(ctx, range, startSheetString, endSheetString, sheetRangeReference); + return new Node(value, range); + } + + throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + } + // Check for rowspan `name` if (_parser.TryGetName(token, out var workbookName)) { diff --git a/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs index 11bd4b4..8e0f1cf 100644 --- a/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs +++ b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs @@ -1,5 +1,4 @@ using System; -using System.Diagnostics; using static ClosedXML.Parser.Pratt.CompatUtils; namespace ClosedXML.Parser.Pratt.Parselets; @@ -13,9 +12,39 @@ internal static class ParserExtensions private const int MIN_ROW_LENGTH = 1; // 1 private const int MAX_ROW_LENGTH = 8; // $1048576 + public static bool TryReferenceA1(this Parser parser, Token token, out ReferenceArea area, out SymbolRange range) + { + if (token.Type is not TokenType.Ident and not TokenType.Number) + { + area = default; + range = default; + return false; + } + + // Check for area `A1:B2` or just cell `A1` + if (parser.TryLocalAreaA1(token, out area, out range)) + return true; + + // Check for colspan `A:B` + if (parser.TryLocalColSpanA1(token, out area, out range)) + return true; + + // Check for rowspan `1:2`, can be ident or number token + if (parser.TryLocalRowSpanA1(token, out area, out range)) + return true; + + return false; + } + public static bool TryLocalAreaA1(this Parser parser, Token identToken, out ReferenceArea area, out SymbolRange range) { - Debug.Assert(identToken.Type == TokenType.Ident); + if (identToken.Type != TokenType.Ident) + { + area = default; + range = default; + return false; + } + var ident = identToken.GetText(parser.Input); if (TryGetCellA1(ident, out var cell1)) @@ -50,7 +79,13 @@ public static bool TryLocalAreaA1(this Parser parser, public static bool TryLocalColSpanA1(this Parser parser, Token identToken, out ReferenceArea area, out SymbolRange range) { - Debug.Assert(identToken.Type == TokenType.Ident); + if (identToken.Type != TokenType.Ident) + { + area = default; + range = default; + return false; + } + var ident = identToken.GetText(parser.Input); // Careful, 'A' can be just a name without the other column @@ -77,7 +112,13 @@ public static bool TryLocalColSpanA1(this Parser parse public static bool TryLocalRowSpanA1(this Parser parser, Token numberOrIdentToken, out ReferenceArea area, out SymbolRange range) { - Debug.Assert(numberOrIdentToken.Type is TokenType.Ident or TokenType.Number); + if (numberOrIdentToken.Type is not TokenType.Ident and not TokenType.Number) + { + area = default; + range = default; + return false; + } + var numberOrIdent = numberOrIdentToken.GetText(parser.Input); if (TryGetRowA1(numberOrIdent, out var row1) && @@ -117,16 +158,20 @@ public static bool TryGetUnquotedSheet(this Parser par public static bool TryGetName(this Parser parser, Token identToken, out ReadOnlySpan name) { - Debug.Assert(identToken.Type == TokenType.Ident); - var text = identToken.GetText(parser.Input); + if (identToken.Type != TokenType.Ident) + { + name = default; + return false; + } + var text = identToken.GetText(parser.Input); if (NameUtils.IsNameValid(text)) { name = text; return true; } - name = string.Empty.AsSpan(); + name = default; return false; } @@ -218,7 +263,7 @@ public static bool TryGetRowA1(ReadOnlySpan text, out RowCol rowRef) var absRow = text[i] == '$'; if (absRow) { - if (++i >= text.Length) + if (++i >= text.Length) return false; } From af70adf6b044724f5a6e1b299c5ac5d3b5207cab Mon Sep 17 00:00:00 2001 From: jahav Date: Wed, 22 Oct 2025 23:50:28 +0200 Subject: [PATCH 4/5] Deduplicate code --- .../Pratt/Parselets/IdentParselet.cs | 61 +++++-------------- 1 file changed, 15 insertions(+), 46 deletions(-) diff --git a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs index f1addf6..635126d 100644 --- a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs +++ b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs @@ -62,56 +62,25 @@ public Node Parse(TContext ctx, Token token) var bangToken = _parser.Consume(TokenType.Bang); var sheetWithBangRange = token.Range.ExtendRight(bangToken.Range); - if (_parser.LookAhead(1) is { Type: TokenType.Ident } sheetRefToken) + // No need to check for token type, if EoF, nothing will be matched to such token + var sheetRefToken = _parser.Consume(); + + // Check for area `sheet!A1:B2` or just cell `sheet!A1` + // Check for colspan `sheet!A:B` + // Check for rowspan `sheet!1:2` with absolute or relative start row + if (_parser.TryReferenceA1(sheetRefToken, out var sheetArea, out var sheetAreaRange)) { - _parser.Consume(TokenType.Ident); - - // Check for area `sheet!A1:B2` or just cell `sheet!A1` - if (_parser.TryLocalAreaA1(sheetRefToken, out var sheetArea, out var sheetAreaRange)) - { - var range = sheetWithBangRange.ExtendRight(sheetAreaRange); - var value = _factory.SheetReference(ctx, range, sheetName, sheetArea); - return new Node(value, range); - } - - // Check for colspan `sheet!A:B` - if (_parser.TryLocalColSpanA1(sheetRefToken, out var sheetColSpan, out var sheetColSpanRange)) - { - var range = sheetWithBangRange.ExtendRight(sheetColSpanRange); - var value = _factory.SheetReference(ctx, range, sheetName, sheetColSpan); - return new Node(value, range); - } - - // Check for rowspan `sheet!$1:2` The $1 is an ident, but this doesn't detect - // rowspan starting with a relative row. That is checked below with a token number. - if (_parser.TryLocalRowSpanA1(sheetRefToken, out var sheetAbsRowSpan, out var sheetAbsRowSpanRange)) - { - var range = sheetWithBangRange.ExtendRight(sheetAbsRowSpanRange); - var value = _factory.SheetReference(ctx, range, sheetName, sheetAbsRowSpan); - return new Node(value, range); - } - - // Check for colspan `sheet!name` - if (_parser.TryGetName(sheetRefToken, out var name)) - { - var range = sheetWithBangRange.ExtendRight(sheetRefToken.Range); - var value = _factory.SheetName(ctx, range, sheetName, name.ToString()); // String allocation, needed for the IAstFactory - return new Node(value, range); - } - - throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + var range = sheetWithBangRange.ExtendRight(sheetAreaRange); + var value = _factory.SheetReference(ctx, range, sheetName, sheetArea); + return new Node(value, range); } - // Check for rowspan `sheet!1:2` with relative start row - if (_parser.LookAhead(1).Type == TokenType.Number) + // Check for `sheet!name` + if (_parser.TryGetName(sheetRefToken, out var name)) { - var sheetRowToken = _parser.Consume(TokenType.Number); - if (_parser.TryLocalRowSpanA1(sheetRowToken, out var sheetRowSpan, out var sheetRowSpanRange)) - { - var range = sheetWithBangRange.ExtendRight(sheetRowSpanRange); - var value = _factory.SheetReference(ctx, range, sheetName, sheetRowSpan); - return new Node(value, range); - } + var range = sheetWithBangRange.ExtendRight(sheetRefToken.Range); + var value = _factory.SheetName(ctx, range, sheetName, name.ToString()); // String allocation, needed for the IAstFactory + return new Node(value, range); } throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); From 2944ce19e70138bc69e2632ff54aed1ef71f3cfe Mon Sep 17 00:00:00 2001 From: jahav Date: Wed, 22 Oct 2025 23:52:09 +0200 Subject: [PATCH 5/5] Deduplicate code --- .../Pratt/Parselets/IdentParselet.cs | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs index 635126d..b484068 100644 --- a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs +++ b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs @@ -35,25 +35,14 @@ public Node Parse(TContext ctx, Token token) // * name // Check for area `A1:B2` or just cell `A1` - if (_parser.TryLocalAreaA1(token, out var localArea, out var localAreaRange)) + // Check for colspan `A:B` + // Check for colspan `$1:2` with absolute row start, because this is an "ident" prefix parselet + if (_parser.TryReferenceA1(token, out var localArea, out var localAreaRange)) { var value = _factory.Reference(ctx, localAreaRange, localArea); return new Node(value, localAreaRange); } - // Check for colspan `A:B` - if (_parser.TryLocalColSpanA1(token, out var localColSpan, out var localColSpanRange)) - { - var value = _factory.Reference(ctx, localColSpanRange, localColSpan); - return new Node(value, localColSpanRange); - } - - // Check for colspan `$1:2` - if (_parser.TryLocalRowSpanA1(token, out var localRowSpan, out var localRowSpanRange)) - { - var value = _factory.Reference(ctx, localRowSpanRange, localRowSpan); - return new Node(value, localRowSpanRange); - } if (_parser.TryGetUnquotedSheet(token, out var sheetNameSpan) && _parser.LookAhead(1).Type == TokenType.Bang) {