diff --git a/src/ClosedXML.Parser.Ast/SheetNameNode.cs b/src/ClosedXML.Parser.Ast/SheetNameNode.cs index 00098dd..a67bd4c 100644 --- a/src/ClosedXML.Parser.Ast/SheetNameNode.cs +++ b/src/ClosedXML.Parser.Ast/SheetNameNode.cs @@ -4,6 +4,7 @@ public record SheetNameNode(string Sheet, string Name) : AstNode { public override string GetDisplayString(ReferenceStyle style) { - return $"[{Sheet}]!{Name}"; + var sheet = NameUtils.ShouldQuote(Sheet) ? '\'' + Sheet.Replace("'", "''") + '\'' : Sheet; + return $"{sheet}!{Name}"; } -} \ No newline at end of file +} diff --git a/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs new file mode 100644 index 0000000..2df78f5 --- /dev/null +++ b/src/ClosedXML.Parser.Tests/Lexers/ParseletIdentTests.cs @@ -0,0 +1,108 @@ +using ClosedXML.Parser.Pratt; + +namespace ClosedXML.Parser.Tests.Lexers; + +public class ParseletIdentTests +{ + [Theory] + + // Local area + [InlineData("A1:B1", typeof(ReferenceNode))] + [InlineData("$A$1:$B$1", typeof(ReferenceNode))] + + // Local cell + [InlineData("A1", typeof(ReferenceNode))] + [InlineData("A$1", typeof(ReferenceNode))] + [InlineData("$A1", typeof(ReferenceNode))] + [InlineData("$A$1", typeof(ReferenceNode))] + [InlineData("XFD1048576", typeof(ReferenceNode))] + [InlineData("XFD$1048576", typeof(ReferenceNode))] + [InlineData("$XFD1048576", typeof(ReferenceNode))] + [InlineData("$XFD$1048576", typeof(ReferenceNode))] + + // Local colspan + [InlineData("A:B", typeof(ReferenceNode))] + [InlineData("$GE:$XFD", typeof(ReferenceNode))] + + // Local rowspan starting with absolute + [InlineData("$1:8", typeof(ReferenceNode))] + [InlineData("$72:$85", typeof(ReferenceNode))] + + // sheet!A1:A2 + [InlineData("Sheet!A1:B2", typeof(SheetReferenceNode))] + [InlineData("Sheet!$Z$84:$BG$99", typeof(SheetReferenceNode))] + + // sheet!A1 + [InlineData("Sheet!A1", typeof(SheetReferenceNode))] + [InlineData("Sheet!$Z$84", typeof(SheetReferenceNode))] + + // sheet!$1:2 + [InlineData("Sheet!$4:81", typeof(SheetReferenceNode))] + [InlineData("Sheet!$1:$5", typeof(SheetReferenceNode))] + + // sheet!name + [InlineData("Sheet!name", typeof(SheetNameNode))] + [InlineData("Sheet!_name", typeof(SheetNameNode))] + + // sheet!1:2 + [InlineData("Sheet!1:2", typeof(SheetReferenceNode))] + [InlineData("Sheet!1:$2", typeof(SheetReferenceNode))] + + // name + [InlineData("_name", typeof(NameNode))] + [InlineData("name", typeof(NameNode))] + + // sheet1:sheet2!A1:B2 + [InlineData("sheet1:sheet2!A1:B2", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$A$1:$B$2", typeof(Reference3DNode))] + + // sheet1:sheet2!A1 + [InlineData("sheet1:sheet2!A1", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$A$1", typeof(Reference3DNode))] + + // sheet1:sheet2!A:B + [InlineData("sheet1:sheet2!A:C", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$A:$C", typeof(Reference3DNode))] + + // sheet1:sheet2!1:2 + [InlineData("sheet1:sheet2!1:2", typeof(Reference3DNode))] + [InlineData("sheet1:sheet2!$1:$2", typeof(Reference3DNode))] + public void Can_parse_references_starting_at_ident(string formula, Type expectedNodeType) + { + var parser = ParserFactory.Create(new F()); + var root = parser.ParseFormula(formula, new Ctx()); + + Assert.Equal(expectedNodeType, root.GetType()); + Assert.Equal(formula, root.GetDisplayString(A1)); + } + + [Theory] + [InlineData("TRUE", true)] + [InlineData("true", true)] + [InlineData("FALSE", false)] + [InlineData("false", false)] + public void Can_parse_logical(string formula, bool expectedValue) + { + var parser = ParserFactory.Create(new F()); + var root = parser.ParseFormula(formula, new Ctx()); + + Assert.Equal(new ValueNode(expectedValue), root); + } + + [Theory] + [InlineData("sheet!$")] + [InlineData("sheet!")] + [InlineData("$")] + [InlineData("A01")] + [InlineData("A0")] + [InlineData("A1048577")] + [InlineData("XFE1")] + [InlineData("sheet1:sheet2!")] + [InlineData("sheet1:sheet2!A")] + [InlineData("sheet1:sheet2!name")] // There is no such thing as 3D name + public void Invalid_references_starting_with_ident_throw_parsing_exception(string formula) + { + var parser = ParserFactory.Create(new F()); + Assert.Throws(() => parser.ParseFormula(formula, new Ctx())); + } +} diff --git a/src/ClosedXML.Parser/NameUtils.cs b/src/ClosedXML.Parser/NameUtils.cs index e9faf1d..46954e2 100644 --- a/src/ClosedXML.Parser/NameUtils.cs +++ b/src/ClosedXML.Parser/NameUtils.cs @@ -105,6 +105,27 @@ public static bool IsSheetNameValid(ReadOnlySpan sheetName) return sheetName.IndexOfAny(InvalidSheetChars) == -1; } + internal static bool IsNameValid(ReadOnlySpan name) + { + if (name.Length is < 1 or > 255) + return false; + + // TODO: Determine what is a valid name and make the method public. + // Alert box says: + // * Starts with a letter or underscore + // * no space or char that is not allowed + if (name[0] != '_' && !char.IsLetter(name[0])) + return false; + + foreach (var nextNameChar in name.Slice(1)) + { + if (!char.IsLetter(nextNameChar)) + return false; + } + + return true; + } + internal static StringBuilder EscapeName(StringBuilder sb, string sheet) { return ShouldQuote(sheet.AsSpan()) diff --git a/src/ClosedXML.Parser/Pratt/CompatUtils.cs b/src/ClosedXML.Parser/Pratt/CompatUtils.cs new file mode 100644 index 0000000..164a64c --- /dev/null +++ b/src/ClosedXML.Parser/Pratt/CompatUtils.cs @@ -0,0 +1,24 @@ +namespace ClosedXML.Parser.Pratt; + +/// +/// Various methods that are not present in .net standard 2.0. +/// +internal static class CompatUtils +{ + /// + /// Replacement for char.IsAsciiLetter that isn't in the netstandard 2.0 + /// + public static bool IsAsciiLetter(char c) + { + return c is >= 'A' and <= 'Z' || + c is >= 'a' and <= 'z'; + } + + /// + /// Replacement for char.IsAsciiDigit that isn't in the netstandard 2.0 + /// + public static bool IsAsciiDigit(char c) + { + return c is >= '0' and <= '9'; + } +} diff --git a/src/ClosedXML.Parser/Pratt/Lexer.cs b/src/ClosedXML.Parser/Pratt/Lexer.cs index 3ca760a..e3bd625 100644 --- a/src/ClosedXML.Parser/Pratt/Lexer.cs +++ b/src/ClosedXML.Parser/Pratt/Lexer.cs @@ -61,12 +61,17 @@ public Token Consume() return _queue.Dequeue(); } - public Token Peek() + public Token Peek(int distance = 1) { - if (_queue.Count == 0) + // TODO: Replace BCL queue with a structure that allows index access + while (_queue.Count < distance) _queue.Enqueue(Next()); - return _queue.Peek(); + var enumerator = _queue.GetEnumerator(); + for (var i = 0; i < distance; ++i) + enumerator.MoveNext(); + + return enumerator.Current; } private Token Next() diff --git a/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs new file mode 100644 index 0000000..b484068 --- /dev/null +++ b/src/ClosedXML.Parser/Pratt/Parselets/IdentParselet.cs @@ -0,0 +1,141 @@ +using System; + +namespace ClosedXML.Parser.Pratt.Parselets; + +internal class IdentParselet : IPrefixParselet +{ + private readonly IAstFactory _factory; + private readonly Parser _parser; + + public IdentParselet(IAstFactory factory, Parser parser) + { + _factory = factory; + _parser = parser; + } + + public Node Parse(TContext ctx, Token token) + { + // When we receive an ident, there are following possibilities what it could be (checked + // in this order): + // * A1:B2 + // * A1 + // * A:B + // * $4:6 - rowspan starting with an absolute row + // * sheet!A1:A2 + // * sheet!A1 + // * sheet!A:B + // * sheet!$1:2 + // * sheet!name + // * sheet!1:2 + // * TRUE/FALSE + // * sheet1:sheet2!A1:A2 + // * sheet1:sheet2!A1 + // * sheet1:sheet2!A:B + // * sheet1:sheet2!$1:2 + // * name + + // Check for area `A1:B2` or just cell `A1` + // Check for colspan `A:B` + // Check for colspan `$1:2` with absolute row start, because this is an "ident" prefix parselet + if (_parser.TryReferenceA1(token, out var localArea, out var localAreaRange)) + { + var value = _factory.Reference(ctx, localAreaRange, localArea); + return new Node(value, localAreaRange); + } + + + if (_parser.TryGetUnquotedSheet(token, out var sheetNameSpan) && _parser.LookAhead(1).Type == TokenType.Bang) + { + // We are now in `sheet!` Parse local reference. + var sheetName = sheetNameSpan.ToString(); // String allocation, needed for the IAstFactory + var bangToken = _parser.Consume(TokenType.Bang); + var sheetWithBangRange = token.Range.ExtendRight(bangToken.Range); + + // No need to check for token type, if EoF, nothing will be matched to such token + var sheetRefToken = _parser.Consume(); + + // Check for area `sheet!A1:B2` or just cell `sheet!A1` + // Check for colspan `sheet!A:B` + // Check for rowspan `sheet!1:2` with absolute or relative start row + if (_parser.TryReferenceA1(sheetRefToken, out var sheetArea, out var sheetAreaRange)) + { + var range = sheetWithBangRange.ExtendRight(sheetAreaRange); + var value = _factory.SheetReference(ctx, range, sheetName, sheetArea); + return new Node(value, range); + } + + // Check for `sheet!name` + if (_parser.TryGetName(sheetRefToken, out var name)) + { + var range = sheetWithBangRange.ExtendRight(sheetRefToken.Range); + var value = _factory.SheetName(ctx, range, sheetName, name.ToString()); // String allocation, needed for the IAstFactory + return new Node(value, range); + } + + throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + } + + var tokenText = token.GetText(_parser.Input); + if (EqualCaseInsensitive(tokenText, "TRUE")) + { + var value = _factory.LogicalNode(ctx, token.Range, true); + return new Node(value, token.Range); + } + + if (EqualCaseInsensitive(tokenText, "FALSE")) + { + var value = _factory.LogicalNode(ctx, token.Range, false); + return new Node(value, token.Range); + } + + // Check for 3D reference for unquoted sheets: + // * Sheet1:Sheet2!A1:B2 + // * Sheet1:Sheet2!A1 + // * Sheet1:Sheet2!A:B + // * Sheet1:Sheet2!1:2 + if (_parser.TryGetUnquotedSheet(token, out var startSheet) && + _parser.LookAhead(1).Type == TokenType.Range && + _parser.LookAhead(2) is { Type: TokenType.Ident } maybeEndSheetToken && + _parser.TryGetUnquotedSheet(maybeEndSheetToken, out var endSheet) && + _parser.LookAhead(3).Type == TokenType.Bang) + { + var sheetStartToken = token; + var rangeToken = _parser.Consume(TokenType.Range); + var sheetEndToken = _parser.Consume(TokenType.Ident); + var bangToken = _parser.Consume(TokenType.Bang); + var refToken = _parser.Consume(); + + if (_parser.TryReferenceA1(refToken, out var sheetRangeReference, out var sheetRangeReferenceRange)) + { + var range = sheetStartToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(sheetEndToken.Range) + .ExtendRight(bangToken.Range) + .ExtendRight(sheetRangeReferenceRange); + var startSheetString = startSheet.ToString(); // String allocation for the IAstFactory + var endSheetString = endSheet.ToString(); + var value = _factory.Reference3D(ctx, range, startSheetString, endSheetString, sheetRangeReference); + return new Node(value, range); + } + + throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + } + + // Check for rowspan `name` + if (_parser.TryGetName(token, out var workbookName)) + { + var value = _factory.Name(ctx, token.Range, workbookName.ToString()); // String allocation, needed for the IAstFactory + return new Node(value, token.Range); + } + + throw new ParsingException($"Unable to parse value starting from position {token.Range.Start}."); + } + + private static bool EqualCaseInsensitive(ReadOnlySpan text, string other) + { + if (text.Length != other.Length) + return false; + + return text.CompareTo(other.AsSpan(), StringComparison.OrdinalIgnoreCase) == 0; + } +} diff --git a/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs new file mode 100644 index 0000000..8e0f1cf --- /dev/null +++ b/src/ClosedXML.Parser/Pratt/Parselets/ParserExtensions.cs @@ -0,0 +1,291 @@ +using System; +using static ClosedXML.Parser.Pratt.CompatUtils; + +namespace ClosedXML.Parser.Pratt.Parselets; + +internal static class ParserExtensions +{ + private const int MIN_A1_LENGTH = 2; // A1 + private const int MAX_A1_LENGTH = 1 + 3 + 1 + 7; // $XFD$1048576 + private const int MIN_COL_LENGTH = 1; // A + private const int MAX_COL_LENGTH = 4; // $XFD + private const int MIN_ROW_LENGTH = 1; // 1 + private const int MAX_ROW_LENGTH = 8; // $1048576 + + public static bool TryReferenceA1(this Parser parser, Token token, out ReferenceArea area, out SymbolRange range) + { + if (token.Type is not TokenType.Ident and not TokenType.Number) + { + area = default; + range = default; + return false; + } + + // Check for area `A1:B2` or just cell `A1` + if (parser.TryLocalAreaA1(token, out area, out range)) + return true; + + // Check for colspan `A:B` + if (parser.TryLocalColSpanA1(token, out area, out range)) + return true; + + // Check for rowspan `1:2`, can be ident or number token + if (parser.TryLocalRowSpanA1(token, out area, out range)) + return true; + + return false; + } + + public static bool TryLocalAreaA1(this Parser parser, Token identToken, out ReferenceArea area, out SymbolRange range) + { + if (identToken.Type != TokenType.Ident) + { + area = default; + range = default; + return false; + } + + var ident = identToken.GetText(parser.Input); + + if (TryGetCellA1(ident, out var cell1)) + { + if (parser.LookAhead(1).Type == TokenType.Range && + parser.LookAhead(2) is { Type: TokenType.Ident } maybeCell2Token && + TryGetCellA1(maybeCell2Token.GetText(parser.Input), out var cell2)) + { + // Result: area A1:B2 + // The code is joining two cells into an area through range operator, but that + // is allowed. Range is highest priority operator, left to right associativity. + var rangeToken = parser.Consume(TokenType.Range); + var cell2Token = parser.Consume(TokenType.Ident); + + area = new ReferenceArea(cell1, cell2); + range = identToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(cell2Token.Range); + return true; + } + + // Result: cell A1 + area = new ReferenceArea(cell1); + range = identToken.Range; + return true; + } + + range = default; + area = default; + return false; + } + + public static bool TryLocalColSpanA1(this Parser parser, Token identToken, out ReferenceArea area, out SymbolRange range) + { + if (identToken.Type != TokenType.Ident) + { + area = default; + range = default; + return false; + } + + var ident = identToken.GetText(parser.Input); + + // Careful, 'A' can be just a name without the other column + if (TryGetColA1(ident, out var col1) && + parser.LookAhead(1).Type == TokenType.Range && + parser.LookAhead(2) is { Type: TokenType.Ident } maybeCol2Token && + TryGetColA1(maybeCol2Token.GetText(parser.Input), out var col2)) + { + // Result: colspan A:B + var rangeToken = parser.Consume(TokenType.Range); + var col2Token = parser.Consume(TokenType.Ident); + + area = new ReferenceArea(col1, col2); + range = identToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(col2Token.Range); + return true; + } + + area = default; + range = default; + return false; + } + + public static bool TryLocalRowSpanA1(this Parser parser, Token numberOrIdentToken, out ReferenceArea area, out SymbolRange range) + { + if (numberOrIdentToken.Type is not TokenType.Ident and not TokenType.Number) + { + area = default; + range = default; + return false; + } + + var numberOrIdent = numberOrIdentToken.GetText(parser.Input); + + if (TryGetRowA1(numberOrIdent, out var row1) && + parser.LookAhead(1).Type == TokenType.Range && + parser.LookAhead(2) is { Type: TokenType.Number or TokenType.Ident } maybeRow2Token && + TryGetRowA1(maybeRow2Token.GetText(parser.Input), out var row2)) + { + // Result: rowspan 1:2 + var rangeToken = parser.Consume(TokenType.Range); + var row2Token = parser.Consume(); + + area = new ReferenceArea(row1, row2); + range = numberOrIdentToken.Range + .ExtendRight(rangeToken.Range) + .ExtendRight(row2Token.Range); + return true; + } + + area = default; + range = default; + return false; + } + + public static bool TryGetUnquotedSheet(this Parser parser, Token identToken, out ReadOnlySpan sheetName) + { + var text = identToken.GetText(parser.Input); + var isUnquotedSheet = NameUtils.IsSheetNameValid(text) && !NameUtils.ShouldQuote(text); + if (isUnquotedSheet) + { + sheetName = text; + return true; + } + + sheetName = default; + return false; + } + + public static bool TryGetName(this Parser parser, Token identToken, out ReadOnlySpan name) + { + if (identToken.Type != TokenType.Ident) + { + name = default; + return false; + } + + var text = identToken.GetText(parser.Input); + if (NameUtils.IsNameValid(text)) + { + name = text; + return true; + } + + name = default; + return false; + } + + /// + /// Is the a valid A1 cell reference? No padding, case insensitive. + /// + public static bool TryGetCellA1(ReadOnlySpan text, out RowCol cell) + { + cell = default; + if (text.Length is < MIN_A1_LENGTH or > MAX_A1_LENGTH) + return false; + + var i = 0; + var absCol = text[i] == '$'; + if (absCol) ++i; + + var col = 0; + while (i < text.Length && IsAsciiLetter(text[i])) + col = col * 26 + GetColIndex(text[i++]) + 1; + + if (col is < RowCol.MinCol or > RowCol.MaxCol || i >= text.Length) + return false; + + var absRow = text[i] == '$'; + if (absRow) + { + if (++i >= text.Length) + return false; + } + + if (text[i] == '0') + return false; + + var row = 0; + while (i < text.Length && IsAsciiDigit(text[i])) + row = row * 10 + text[i++] - '0'; + + if (row is < RowCol.MinRow or > RowCol.MaxRow || i < text.Length) + return false; + + cell = new RowCol( + absRow ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, row, + absCol ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, col, + A1); + return true; + } + + /// + /// Is the a valid end of an A1 colspan? No padding, case insensitive. + /// Valid examples: A, a, $A, $XFD. + /// Invalid examples: A , $ a, $, $XFE. + /// + public static bool TryGetColA1(ReadOnlySpan text, out RowCol colRef) + { + colRef = default; + if (text.Length is < MIN_COL_LENGTH or > MAX_COL_LENGTH) + return false; + + var i = 0; + var absCol = text[i] == '$'; + if (absCol) ++i; + + var col = 0; + while (i < text.Length && IsAsciiLetter(text[i])) + col = col * 26 + GetColIndex(text[i++]) + 1; + + if (col is < RowCol.MinCol or > RowCol.MaxCol || i < text.Length) + return false; + + colRef = new RowCol( + ReferenceAxisType.None, 0, + absCol ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, col, + A1); + return true; + } + + /// + /// Is the a valid end of an A1 rowspan? No padding. + /// Valid examples: 1, $1, $1048576. + /// Invalid examples: 1.0, $ 1, $, $1048577. + /// + public static bool TryGetRowA1(ReadOnlySpan text, out RowCol rowRef) + { + rowRef = default; + if (text.Length is < MIN_ROW_LENGTH or > MAX_ROW_LENGTH) + return false; + + var i = 0; + var absRow = text[i] == '$'; + if (absRow) + { + if (++i >= text.Length) + return false; + } + + if (text[i] == '0') + return false; + + var row = 0; + while (i < text.Length && IsAsciiDigit(text[i])) + row = row * 10 + text[i++] - '0'; + + if (row is < RowCol.MinRow or > RowCol.MaxRow || i < text.Length) + return false; + + rowRef = new RowCol( + absRow ? ReferenceAxisType.Absolute : ReferenceAxisType.Relative, row, + ReferenceAxisType.None, 0, + A1); + return true; + } + + private static int GetColIndex(char asciiLetter) + { + return (asciiLetter | 0x20) - 'a'; + } +} diff --git a/src/ClosedXML.Parser/Pratt/Parser.cs b/src/ClosedXML.Parser/Pratt/Parser.cs index 254c270..c7f3578 100644 --- a/src/ClosedXML.Parser/Pratt/Parser.cs +++ b/src/ClosedXML.Parser/Pratt/Parser.cs @@ -56,6 +56,11 @@ private Node Prefix(TContext ctx) return parselet.Parse(ctx, token); } + public Token LookAhead(int distance) + { + return _lexer.Peek(distance); + } + internal Token Consume(TokenType expectedType) { var token = _lexer.Consume(); @@ -65,6 +70,11 @@ internal Token Consume(TokenType expectedType) return token; } + internal Token Consume() + { + return _lexer.Consume(); + } + internal void Register(TokenType type, IPrefixParselet parselet) { _prefixParselets.Add(type, parselet); diff --git a/src/ClosedXML.Parser/Pratt/ParserFactory.cs b/src/ClosedXML.Parser/Pratt/ParserFactory.cs index 84f22cb..a2bf4e1 100644 --- a/src/ClosedXML.Parser/Pratt/ParserFactory.cs +++ b/src/ClosedXML.Parser/Pratt/ParserFactory.cs @@ -12,6 +12,7 @@ public static Parser Create( // Register prefix parselets parser.Register(TokenType.Number, new NumberParselet(factory, parser)); parser.Register(TokenType.LeftParen, new GroupParselet(parser)); + parser.Register(TokenType.Ident, new IdentParselet(factory, parser)); // Register operation parselets parser.Register(TokenType.Plus, new BinaryOpParselet(factory, parser, BinaryOperation.Addition, BindingPower.Addition)); diff --git a/src/ClosedXML.Parser/RowCol.cs b/src/ClosedXML.Parser/RowCol.cs index b693873..e650400 100644 --- a/src/ClosedXML.Parser/RowCol.cs +++ b/src/ClosedXML.Parser/RowCol.cs @@ -31,7 +31,9 @@ namespace ClosedXML.Parser; /// public readonly struct RowCol : IEquatable { + internal const int MinRow = 1; internal const int MaxRow = 1048576; + internal const int MinCol = 1; internal const int MaxCol = 16384; // keep at 0, so default ctor creates is A1