diff --git a/src/Parser/Parser.php b/src/Parser/Parser.php index 0ef9958..c1d325b 100644 --- a/src/Parser/Parser.php +++ b/src/Parser/Parser.php @@ -33,6 +33,16 @@ final class Parser { + /** + * @var string + */ + private const CONTEXT_ARRAY = 'array'; + + /** + * @var string + */ + private const CONTEXT_INLINE_TABLE = 'inline_table'; + /** * @var array<\PhpCollective\Toml\Parser\ParseError> */ @@ -49,6 +59,14 @@ final class Parser private string $input = ''; + /** + * Stack tracking nesting context for error recovery. + * Contains 'array' or 'inline_table' entries. + * + * @var array + */ + private array $contextStack = []; + public function __construct( bool $preserveTrivia = false, private readonly TomlVersion $version = TomlVersion::V11, @@ -63,6 +81,7 @@ public function parse(string $input): Document $this->tokens = iterator_to_array($lexer->tokenize()); $this->pos = 0; $this->errors = []; + $this->contextStack = []; return $this->parseDocument(); } @@ -226,6 +245,156 @@ private function parseKeyValue(): ?KeyValue ); } + /** + * Parse a key-value pair inside an inline table. + * Unlike parseKeyValue(), this does not call synchronize() on error, + * allowing the inline table parser to handle recovery. + */ + private function parseInlineTableKeyValue(): ?KeyValue + { + $start = $this->current()->span; + $key = $this->parseKeyWithoutSync(); + + if ($key === null) { + return null; + } + + if (!$this->check(TokenType::Equals)) { + $hint = $this->getExpectHint(TokenType::Equals); + $this->error('Expected =', $this->current()->span, $hint); + + return null; + } + $this->advance(); + + $this->skipWhitespace(); + + $value = $this->parseValue(); + if ($value === null) { + $token = $this->current(); + if ($token->is(TokenType::Invalid)) { + $hint = $this->getInvalidTokenHint($token->value); + $this->error("Invalid token: `{$token->value}`", $token->span, $hint); + } else { + $hint = $this->getExpectedValueHint($token); + $this->error('Expected value', $token->span, $hint); + } + + return null; + } + + $span = $start->merge($value->getSpan()); + + return new KeyValue( + $key, + $value, + $span, + $this->slice($span), + $this->slicePrefixTo($span, $value->getSpan()), + $this->sliceRange($key->getSpan()->end, $value->getSpan()->start), + ); + } + + /** + * Parse a key without calling synchronize() on error. + * Used for inline table key-value parsing where the parent handles recovery. + */ + private function parseKeyWithoutSync(): ?Key + { + $parts = []; + $styles = []; + $start = null; + $rawSeparators = []; + $lastPartEnd = 0; + $separatorStart = null; + + do { + $this->skipWhitespace(); + $token = $this->current(); + $start ??= $token->span; + + if ($separatorStart !== null) { + $rawSeparators[] = $this->sliceRange($separatorStart, $token->span->start); + $separatorStart = null; + } + + if ($token->is(TokenType::BareKey)) { + $parts[] = $token->parsed; + $styles[] = KeyStyle::Bare; + $this->advance(); + } elseif ($token->is(TokenType::BasicString)) { + $parts[] = $token->parsed; + $styles[] = KeyStyle::Basic; + $this->advance(); + } elseif ($token->is(TokenType::LiteralString)) { + $parts[] = $token->parsed; + $styles[] = KeyStyle::Literal; + $this->advance(); + } elseif ($token->is(TokenType::Integer)) { + if (preg_match('/^[+-]?\d[\d_]*$/', $token->value) !== 1) { + $this->error('Expected key', $token->span, 'Only decimal integers can be used as bare keys.'); + + return null; + } + $parts[] = $token->value; + $styles[] = KeyStyle::Bare; + $this->advance(); + } elseif ($token->is(TokenType::Invalid)) { + if (preg_match('/^[A-Za-z0-9_-]+$/', $token->value) !== 1) { + $hint = $this->getExpectedKeyHint($token); + $this->error('Expected key', $token->span, $hint); + + return null; + } + $parts[] = $token->value; + $styles[] = KeyStyle::Bare; + $this->advance(); + } elseif ($token->is(TokenType::Boolean)) { + $parts[] = $token->value; + $styles[] = KeyStyle::Bare; + $this->advance(); + } elseif ($token->is(TokenType::Float)) { + $value = $token->value; + if (preg_match('/^\d+\.\d+$/', str_replace('_', '', $value)) === 1 && !str_contains(strtolower($value), 'e')) { + $dotParts = explode('.', $value); + foreach ($dotParts as $part) { + $parts[] = $part; + $styles[] = KeyStyle::Bare; + $rawSeparators[] = '.'; + } + array_pop($rawSeparators); + } else { + $parts[] = $value; + $styles[] = KeyStyle::Bare; + } + $this->advance(); + } elseif ($token->is(TokenType::LocalDate, TokenType::LocalTime, TokenType::LocalDateTime, TokenType::OffsetDateTime)) { + $parts[] = $token->value; + $styles[] = KeyStyle::Bare; + $this->advance(); + } else { + $hint = $this->getExpectedKeyHint($token); + $this->error('Expected key', $token->span, $hint); + + return null; + } + + $lastPartEnd = $token->span->end; + $this->skipWhitespace(); + if ($this->match(TokenType::Dot)) { + $separatorStart = $lastPartEnd; + + continue; + } + + break; + } while (true); + + $span = new Span($start->start, $lastPartEnd, $start->line, $start->column); + + return new Key($parts, $styles, $span, $this->slice($span), null, null, $rawSeparators); + } + private function parseKey(): ?Key { $parts = []; @@ -450,6 +619,8 @@ private function parseLocalTime(): LocalTime private function parseArray(): ArrayValue { + $this->contextStack[] = self::CONTEXT_ARRAY; + $start = $this->current()->span; $this->advance(); // skip [ @@ -478,12 +649,12 @@ private function parseArray(): ArrayValue $token = $this->current(); if (!$this->check(TokenType::RightBracket)) { $this->error('Expected value in array', $token->span); - // Skip the problematic token to recover - if ($this->match(TokenType::Comma)) { - continue; + // Recover within the array context + if (!$this->synchronizeInCollection()) { + break; } - break; + continue; } break; @@ -523,6 +694,8 @@ private function parseArray(): ArrayValue $this->expect(TokenType::RightBracket); $span = $start->merge($this->previous()->span); + array_pop($this->contextStack); + return new ArrayValue( $items, $span, @@ -537,6 +710,8 @@ private function parseArray(): ArrayValue private function parseInlineTable(): InlineTable { + $this->contextStack[] = self::CONTEXT_INLINE_TABLE; + $start = $this->current()->span; $this->advance(); // skip { @@ -559,12 +734,19 @@ private function parseInlineTable(): InlineTable break; } - $kv = $this->parseKeyValue(); + $kv = $this->parseInlineTableKeyValue(); if ($kv !== null) { if ($this->preserveTrivia) { $kv->setLeadingTrivia($nextLeadingTrivia); } $items[] = $kv; + } else { + // Failed to parse key-value, try to recover + if (!$this->synchronizeInCollection()) { + break; + } + + continue; } $trailingTrivia = $this->preserveTrivia ? $this->collectCollectionTrivia() : []; @@ -576,7 +758,7 @@ private function parseInlineTable(): InlineTable if (!$this->match(TokenType::Comma)) { break; } - if ($kv !== null && $this->preserveTrivia) { + if ($this->preserveTrivia) { $kv->setTrailingTrivia($trailingTrivia); } @@ -590,7 +772,7 @@ private function parseInlineTable(): InlineTable break; } - } elseif ($kv !== null && $this->preserveTrivia) { + } elseif ($this->preserveTrivia) { $kv->setTrailingTrivia($trailingTrivia); } } @@ -598,6 +780,8 @@ private function parseInlineTable(): InlineTable $this->expect(TokenType::RightBrace); $span = $start->merge($this->previous()->span); + array_pop($this->contextStack); + if ($this->version === TomlVersion::V10) { if ($this->inlineTableIsMultiline($start)) { $this->error('Multiline inline tables require TOML 1.1', $span); @@ -1055,6 +1239,10 @@ private function error(string $message, Span $span, ?string $hint = null): void $this->errors[] = new ParseError($message, $span, $hint); } + /** + * Synchronize parser state after an error at the top level. + * Skips tokens until a newline or table header is found. + */ private function synchronize(): void { while (!$this->isAtEnd()) { @@ -1063,10 +1251,57 @@ private function synchronize(): void return; } + // Stop at table header start for recovery if ($this->check(TokenType::LeftBracket)) { return; } $this->advance(); } } + + /** + * Synchronize parser state after an error inside a collection (array or inline table). + * Skips to the next comma or closing bracket/brace, allowing recovery within the collection. + * + * @return bool True if recovery found a comma (can continue parsing), false if hit closing bracket/brace or end + */ + private function synchronizeInCollection(): bool + { + $depth = 0; + + while (!$this->isAtEnd()) { + $token = $this->current(); + + // Track nested brackets to avoid stopping at wrong level + if ($token->is(TokenType::LeftBracket, TokenType::LeftBrace)) { + $depth++; + $this->advance(); + + continue; + } + + if ($token->is(TokenType::RightBracket, TokenType::RightBrace)) { + if ($depth > 0) { + $depth--; + $this->advance(); + + continue; + } + + // At our level's closing bracket - stop without consuming + return false; + } + + // At our level, comma means we can continue with next element + if ($depth === 0 && $token->is(TokenType::Comma)) { + $this->advance(); + + return true; + } + + $this->advance(); + } + + return false; + } } diff --git a/tests/Parser/ErrorRecoveryTest.php b/tests/Parser/ErrorRecoveryTest.php new file mode 100644 index 0000000..d80f3d6 --- /dev/null +++ b/tests/Parser/ErrorRecoveryTest.php @@ -0,0 +1,225 @@ +assertCount(1, $result->getErrors()); + $this->assertSame('Expected value', $result->getErrors()[0]->message); + $this->assertSame(1, $result->getErrors()[0]->span->line); + + // Both document items should be parsed + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(2, $doc->items); + + $this->assertInstanceOf(KeyValue::class, $doc->items[0]); + $this->assertSame(['data'], $doc->items[0]->key->parts); + + $this->assertInstanceOf(KeyValue::class, $doc->items[1]); + $this->assertSame(['other'], $doc->items[1]->key->parts); + } + + public function testRecoveryInInlineTableRecoversContinuingItems(): void + { + $input = '{a = 1, b = , c = 3}'; + + $result = Toml::tryParse('t = ' . $input); + + // Should only have 1 error + $this->assertCount(1, $result->getErrors()); + $this->assertSame('Expected value', $result->getErrors()[0]->message); + + // The inline table should have 2 items: a and c (b failed) + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(1, $doc->items); + $this->assertInstanceOf(KeyValue::class, $doc->items[0]); + + $inlineTable = $doc->items[0]->value; + $this->assertInstanceOf(InlineTable::class, $inlineTable); + $this->assertCount(2, $inlineTable->items); + + $this->assertSame(['a'], $inlineTable->items[0]->key->parts); + $this->assertSame(['c'], $inlineTable->items[1]->key->parts); + } + + public function testRecoveryInArrayOfInlineTables(): void + { + $input = <<<'TOML' +data = [{a = 1}, {b = }, {c = 3}] +other = "value" +TOML; + + $result = Toml::tryParse($input); + + // Should only have 1 error for the missing value in second inline table + $this->assertCount(1, $result->getErrors()); + $this->assertSame('Expected value', $result->getErrors()[0]->message); + $this->assertSame(1, $result->getErrors()[0]->span->line); + + // Both document items should be parsed + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(2, $doc->items); + + $this->assertInstanceOf(KeyValue::class, $doc->items[0]); + $this->assertSame(['data'], $doc->items[0]->key->parts); + + $this->assertInstanceOf(KeyValue::class, $doc->items[1]); + $this->assertSame(['other'], $doc->items[1]->key->parts); + } + + public function testRecoveryInDeeplyNestedInlineTable(): void + { + $input = <<<'TOML' +deep = {level1 = {level2 = {invalid = }}} +after = "test" +TOML; + + $result = Toml::tryParse($input); + + // Should only have 1 error for the missing value at the deepest level + $this->assertCount(1, $result->getErrors()); + $this->assertSame('Expected value', $result->getErrors()[0]->message); + $this->assertSame(1, $result->getErrors()[0]->span->line); + + // Both document items should be parsed + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(2, $doc->items); + + $this->assertInstanceOf(KeyValue::class, $doc->items[0]); + $this->assertSame(['deep'], $doc->items[0]->key->parts); + + $this->assertInstanceOf(KeyValue::class, $doc->items[1]); + $this->assertSame(['after'], $doc->items[1]->key->parts); + } + + public function testRecoveryInArrayWithInvalidValue(): void + { + // Tests that array recovery skips invalid values and continues + $input = <<<'TOML' +arr = [1, , 3] +other = 123 +TOML; + + $result = Toml::tryParse($input); + + // Should have 1 error for the empty array element + $this->assertCount(1, $result->getErrors()); + $this->assertStringContainsString('Expected value', $result->getErrors()[0]->message); + + // Both document items should be parsed + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(2, $doc->items); + } + + public function testRecoveryInNestedArrays(): void + { + $input = <<<'TOML' +arr = [1, [2, , 4], 5] +after = "test" +TOML; + + $result = Toml::tryParse($input); + + // Should have 1 error for the empty nested array element + $this->assertCount(1, $result->getErrors()); + + // Both document items should be parsed + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(2, $doc->items); + } + + public function testTopLevelRecoveryStillWorks(): void + { + // Test that top-level synchronization still works + $input = <<<'TOML' +[table +key = "value" +other = 123 +TOML; + + $result = Toml::tryParse($input); + + // Should have error for missing bracket + $this->assertGreaterThanOrEqual(1, count($result->getErrors())); + + // The key-value pairs should be recovered + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertGreaterThanOrEqual(2, count($doc->items)); + } + + public function testMultipleInlineTableErrorsInSameTable(): void + { + $input = '{a = , b = , c = 3}'; + + $result = Toml::tryParse('t = ' . $input); + + // Should have 2 errors (one for each missing value) + $this->assertCount(2, $result->getErrors()); + + // The inline table should have 1 item: c (a and b failed) + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(1, $doc->items); + $this->assertInstanceOf(KeyValue::class, $doc->items[0]); + + $inlineTable = $doc->items[0]->value; + $this->assertInstanceOf(InlineTable::class, $inlineTable); + $this->assertCount(1, $inlineTable->items); + + $this->assertSame(['c'], $inlineTable->items[0]->key->parts); + } + + public function testRecoveryPreservesValidArrayElements(): void + { + $input = 'arr = [1, 2, invalid, 4, 5]'; + + $result = Toml::tryParse($input); + + // Should have error for the invalid element + $this->assertGreaterThanOrEqual(1, count($result->getErrors())); + + // Array should still be created + $doc = $result->getDocument(); + $this->assertNotNull($doc); + $this->assertCount(1, $doc->items); + $this->assertInstanceOf(KeyValue::class, $doc->items[0]); + + $arr = $doc->items[0]->value; + $this->assertInstanceOf(ArrayValue::class, $arr); + + // Should have parsed elements before and after the invalid one + $this->assertGreaterThanOrEqual(2, count($arr->items)); + } +}