diff --git a/.gitattributes b/.gitattributes index 165404f..fe098c4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -11,3 +11,4 @@ /ecs.php export-ignore /rector.php export-ignore /tests export-ignore +/benchmarks export-ignore diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 8aac514..72a7ec1 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -38,10 +38,5 @@ jobs: echo "::add-matcher::${{ runner.tool_cache }}/php.json" echo "::add-matcher::${{ runner.tool_cache }}/phpunit.json" - - name: Execute tests with mutation - if: ${{ matrix.os == 'ubuntu-latest' && matrix.php == '8.5' }} - run: vendor/bin/pest --colors=always --mutate --parallel --min=50 - - name: Execute tests - if: ${{ matrix.os != 'ubuntu-latest' || matrix.php != '8.5' }} - run: vendor/bin/pest --colors=always + run: vendor/bin/pest --colors=always --parallel diff --git a/README.md b/README.md index 99d5bcd..f489f8a 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,28 @@ $data = json_repair_decode( ); ``` +## Logging + +The library supports PSR-3 logging for debugging repair operations. Pass any PSR-3 compatible logger to see what repairs are being made: + +```php +use Psr\Log\LoggerInterface; + +// Using the helper function +$repaired = json_repair($broken, logger: $logger); + +// Using the class (implements LoggerAwareInterface) +$repairer = new JsonRepairer($broken); +$repairer->setLogger($logger); +$repaired = $repairer->repair(); +``` + +Log messages include the position in the JSON string and a context snippet showing where the repair occurred. This is useful for: + +- Debugging why certain repairs are being made +- Understanding how malformed JSON is being interpreted +- Tracking repair operations in production environments + ## Credits - [Sean Tymon](https://github.com/tymondesigns) diff --git a/benchmarks/JsonRepairerBench.php b/benchmarks/JsonRepairerBench.php index 8ea65cc..6bc67e9 100644 --- a/benchmarks/JsonRepairerBench.php +++ b/benchmarks/JsonRepairerBench.php @@ -11,7 +11,7 @@ /** * @Revs(100) - * @Iterations(20) + * @Iterations(10) * @Warmup(2) */ class JsonRepairerBench @@ -41,7 +41,15 @@ public function benchRepairValidJson(array $params): void } /** + * Benchmarks large JSON repair. + * + * Note: This benchmark uses fewer revs/iterations since it processes + * 1000 items and takes ~44ms per run. Use --filter=benchRepairLargeJson + * to run it separately. + * * @ParamProviders({"provideLargeJson"}) + * @Revs(10) + * @Iterations(5) */ public function benchRepairLargeJson(array $params): void { @@ -81,6 +89,16 @@ public function benchRepairStreamingJson(array $params): void json_repair($params['json']); } + /** + * Baseline: compare repair overhead against native json_decode on valid JSON. + * + * @ParamProviders({"provideValidJson"}) + */ + public function benchNativeJsonDecodeBaseline(array $params): void + { + json_decode($params['json']); + } + /** * @return array> */ @@ -141,7 +159,7 @@ public function provideLargeJson(): array $brokenJson = rtrim($brokenJson, '}') . ',}'; return [ - 'large_array' => ['json' => $brokenJson], + 'large_array_broken' => ['json' => $brokenJson], ]; } diff --git a/composer.json b/composer.json index bac365b..b86972e 100644 --- a/composer.json +++ b/composer.json @@ -18,9 +18,11 @@ ], "require": { "php": "^8.3", - "ext-json": "*" + "ext-json": "*", + "psr/log": "^3.0" }, "require-dev": { + "colinodell/psr-testlogger": "^1.3", "pestphp/pest": "^4.1.4", "pestphp/pest-plugin-type-coverage": "^4.0.3", "phpbench/phpbench": "^1.4", diff --git a/phpbench.json b/phpbench.json index 42be822..ce0b109 100644 --- a/phpbench.json +++ b/phpbench.json @@ -2,8 +2,14 @@ "$schema": "https://raw.githubusercontent.com/phpbench/phpbench/master/lib/phpbench.schema.json", "runner.bootstrap": "vendor/autoload.php", "runner.path": "benchmarks", - "runner.iterations": 20, + "runner.iterations": 10, "runner.revs": 100, "runner.warmup": 2, - "runner.time_unit": "microseconds" + "runner.time_unit": "microseconds", + "report.generators": { + "compare": { + "generator": "table", + "cols": ["benchmark", "subject", "set", "revs", "its", "mem_peak", "mode", "rstdev"] + } + } } diff --git a/src/Exceptions/JsonRepairException.php b/src/Exceptions/JsonRepairException.php new file mode 100644 index 0000000..a9e9b56 --- /dev/null +++ b/src/Exceptions/JsonRepairException.php @@ -0,0 +1,30 @@ +json)) { + $this->log('JSON is already valid, returning as-is'); + return $this->json; } + $this->log('Starting JSON repair'); + // Extract JSON from markdown code blocks if present $json = $this->extractJsonFromMarkdown($this->json); + if ($json !== $this->json) { + $this->log('Extracted JSON from markdown code block'); + } + // Handle multiple JSON objects $json = $this->extractFirstValidJson($json); @@ -83,24 +120,50 @@ public function repair(): string // @phpstan-ignore identical.alwaysFalse (state changes in loop iterations) if ($this->state === self::STATE_IN_STRING_ESCAPE) { // If we're at the end of the string and in escape state, the escape is incomplete + // Just drop the incomplete escape (backslash wasn't added to output yet) if ($i >= strlen($json)) { - // Remove the backslash, treat as literal character - $this->output = substr($this->output, 0, -1); $this->state = self::STATE_IN_STRING; break; } - $this->handleEscapeSequence($char); + $extraCharsConsumed = $this->handleEscapeSequence($char, $json); $this->state = self::STATE_IN_STRING; - $i++; + $i += 1 + $extraCharsConsumed; continue; } // Handle characters inside strings // @phpstan-ignore identical.alwaysFalse (state changes in loop iterations) if ($this->state === self::STATE_IN_STRING) { + // Check for smart quotes as closing delimiter + $smartQuoteLength = $this->getSmartQuoteLength($json, $i); + + // Handle double quote inside single-quoted string - must escape it + // @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (delimiter set when entering string state and can be single quote) + if ($char === '"' && $this->stringDelimiter === "'") { + $this->log('Escaping double quote inside single-quoted string'); + $this->output .= '\\"'; + $i++; + continue; + } + // @phpstan-ignore identical.alwaysFalse (delimiter set when entering string state) - if ($char === $this->stringDelimiter) { + if ($char === $this->stringDelimiter || $smartQuoteLength > 0) { + // Check if this quote should be escaped (it's inside the string value) + // @phpstan-ignore identical.alwaysFalse (smartQuoteLength can be 0 when char matches delimiter) + $isRegularQuote = $smartQuoteLength === 0; + // @phpstan-ignore booleanOr.alwaysFalse + $isInValue = $this->stateBeforeString === self::STATE_IN_OBJECT_VALUE // @phpstan-ignore identical.alwaysFalse + || $this->stateBeforeString === self::STATE_IN_ARRAY; // @phpstan-ignore identical.alwaysFalse + + // @phpstan-ignore booleanAnd.leftAlwaysFalse, booleanAnd.rightAlwaysFalse, booleanAnd.alwaysFalse (variables can be true at runtime) + if ($isRegularQuote && $isInValue && $this->shouldEscapeQuoteInValue($json, $i)) { + $this->log('Escaping embedded quote inside string value'); + $this->output .= '\\"'; + $i++; + continue; + } + // Always close with double quote, even if opened with single quote $this->output .= '"'; $this->inString = false; @@ -112,17 +175,39 @@ public function repair(): string $this->currentKeyStart = -1; } - $i++; + // @phpstan-ignore greater.alwaysTrue (smartQuoteLength can be 0 when char matches delimiter) + $i += $smartQuoteLength > 0 ? $smartQuoteLength : 1; continue; } if ($char === '\\') { - $this->output .= $char; + // Don't output the backslash yet - let handleEscapeSequence decide $this->state = self::STATE_IN_STRING_ESCAPE; $i++; continue; } + // Check if this is a structural character that should close an unclosed string + // This handles cases like {"key": "value with no closing quote} + if (($char === '}' || $char === ']') && $this->shouldCloseStringAtStructuralChar($json, $i)) { + $this->log('Closing unclosed string at structural character', [ + 'char' => $char, + ]); + // Close the string and let the structural character be processed + $this->output .= '"'; + $this->inString = false; + $this->stringDelimiter = ''; + $this->state = $this->getNextStateAfterString(); + + // Reset key tracking + if ($this->state === self::STATE_EXPECTING_COMMA_OR_END) { + $this->currentKeyStart = -1; + } + + // Don't increment i - let the structural char be processed in the next iteration + continue; + } + $this->output .= $char; $i++; continue; @@ -153,18 +238,16 @@ public function repair(): string // Check if we should remove incomplete string values // @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (stateBeforeString is set when entering string state and can be STATE_IN_OBJECT_VALUE) if ($this->omitIncompleteStrings && $this->stateBeforeString === self::STATE_IN_OBJECT_VALUE) { + $this->log('Removing incomplete string value (omitIncompleteStrings enabled)'); $this->removeCurrentKey(); // Update state after removing key $this->state = self::STATE_EXPECTING_COMMA_OR_END; } else { + $this->log('Adding missing closing quote for unclosed string'); $this->output .= '"'; - // If we were in a string escape state, the escape was incomplete - // @phpstan-ignore identical.alwaysFalse (state can be STATE_IN_STRING_ESCAPE if string ended during escape) - if ($this->state === self::STATE_IN_STRING_ESCAPE) { - // Remove the incomplete escape backslash - $this->output = substr($this->output, 0, -2) . substr($this->output, -1); - } + // Note: If we were in escape state, the incomplete escape backslash + // was never added to output (we defer adding it to handleEscapeSequence) // Update state after closing string $this->state = $this->getNextStateAfterString(); @@ -179,8 +262,10 @@ public function repair(): string if ($this->state === self::STATE_EXPECTING_COLON) { // We have a key but no colon/value - add colon and empty value if ($this->omitEmptyValues) { + $this->log('Removing key without value (omitEmptyValues enabled)'); $this->removeCurrentKey(); } else { + $this->log('Adding missing colon and empty value for incomplete key'); $this->output .= ':""'; } @@ -198,9 +283,13 @@ public function repair(): string } } - // If we're in OBJECT_VALUE state and output ends with ':', add empty string + // If we're in OBJECT_VALUE state and output ends with ':' (possibly with trailing space), add empty string + $trimmedForCheck = rtrim($this->output); + // @phpstan-ignore booleanAnd.alwaysFalse, identical.alwaysFalse (state can change during loop) - if ($this->state === self::STATE_IN_OBJECT_VALUE && str_ends_with($this->output, ':')) { + if ($this->state === self::STATE_IN_OBJECT_VALUE && str_ends_with($trimmedForCheck, ':')) { + $this->output = $trimmedForCheck; + if ($this->omitEmptyValues) { $this->removeCurrentKey(); } else { @@ -213,11 +302,18 @@ public function repair(): string // Close any unclosed brackets/braces while ($this->stack !== []) { $expected = array_pop($this->stack); + $this->log('Adding missing closing bracket/brace', [ + 'char' => $expected, + ]); // Remove trailing comma before closing $this->removeTrailingComma(); - if ($expected === '}' && str_ends_with($this->output, ':')) { + $trimmedForBrace = rtrim($this->output); + + if ($expected === '}' && str_ends_with($trimmedForBrace, ':')) { + $this->output = $trimmedForBrace; + if ($this->omitEmptyValues) { $this->removeCurrentKey(); } else { @@ -240,6 +336,10 @@ public function repair(): string } } + if ($this->output !== '' && ! json_validate($this->output)) { + throw JsonRepairException::invalidJsonAfterRepair($this->output); + } + return $this->output; } @@ -258,6 +358,16 @@ public function decode( return is_array($decoded) ? $decoded : (object) $decoded; } + /** + * Extract JSON content from markdown code blocks. + * + * Looks for ```json or ``` code blocks and returns the content. + * If no markdown blocks are found, returns the input as-is. + * + * @param string $input The input string that may contain markdown code blocks + * + * @return string The extracted JSON content or original input + */ private function extractJsonFromMarkdown(string $input): string { $matchCount = preg_match_all('/```json\s*([\s\S]*?)\s*```/', $input, $matches); @@ -275,6 +385,17 @@ private function extractJsonFromMarkdown(string $input): string return $input; } + /** + * Extract the first valid JSON object or array from the input. + * + * Scans the input to find the longest valid JSON object or array. + * This is useful when JSON is embedded in other text or when + * there are multiple JSON structures. + * + * @param string $input The input string that may contain JSON + * + * @return string The first valid JSON found, or the original input if none found + */ private function extractFirstValidJson(string $input): string { if (json_validate($input)) { @@ -363,6 +484,17 @@ private function extractFirstValidJson(string $input): string return $bestMatch ?? $input; } + /** + * Handle the starting state of parsing. + * + * Processes the first character of the JSON, expecting either an object + * opening brace or an array opening bracket. + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ private function handleStart(string $json, int $i): int { $char = $json[$i]; @@ -387,6 +519,17 @@ private function handleStart(string $json, int $i): int return $i + 1; } + /** + * Handle parsing an object key. + * + * Processes keys within a JSON object, which can be quoted, single-quoted, + * or unquoted (containing only alphanumeric characters, underscores, or hyphens). + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ private function handleObjectKey(string $json, int $i): int { $char = $json[$i]; @@ -401,6 +544,69 @@ private function handleObjectKey(string $json, int $i): int } if ($char === '"' || $char === "'") { + // Check for double-quote delimiter pattern like ""key"" (slanted delimiter style) + // If we have ""X where X is alphanumeric, skip the double quotes and read as unquoted key + if ($i + 2 < strlen($json) && $json[$i + 1] === $char) { + $afterDoubleQuote = $json[$i + 2]; + + if (ctype_alnum($afterDoubleQuote) || $afterDoubleQuote === '_' || $afterDoubleQuote === ' ') { + $this->log('Found doubled quote delimiter pattern, normalizing key'); + // This looks like ""key"" pattern - skip the opening "" and read the key + $this->currentKeyStart = strlen($this->output); + $this->output .= '"'; + $keyStart = $i + 2; + $keyEnd = $keyStart; + + // Read until we hit the closing "" or single " or : or } + while ($keyEnd < strlen($json)) { + $keyChar = $json[$keyEnd]; + + // Check for closing "" pattern + if (($keyChar === '"' || $keyChar === "'") && $keyEnd + 1 < strlen( + $json, + ) && $json[$keyEnd + 1] === $keyChar) { + break; + } + + // Also stop at single quote followed by colon (end of key) + if (($keyChar === '"' || $keyChar === "'") && $keyEnd + 1 < strlen( + $json, + ) && $json[$keyEnd + 1] === ':') { + break; + } + + // Stop at colon or closing brace + if ($keyChar === ':' || $keyChar === '}') { + break; + } + + $this->output .= $keyChar; + $keyEnd++; + } + + $this->output .= '"'; + $this->state = self::STATE_EXPECTING_COLON; + + // Skip past the closing "" if present + if ($keyEnd + 1 < strlen( + $json, + ) && ($json[$keyEnd] === '"' || $json[$keyEnd] === "'") && $json[$keyEnd + 1] === $json[$keyEnd]) { + return $keyEnd + 2; + } + + // Skip past single closing " if present (followed by :) + if ($keyEnd < strlen($json) && ($json[$keyEnd] === '"' || $json[$keyEnd] === "'")) { + return $keyEnd + 1; + } + + return $keyEnd; + } + } + + if ($char === "'") { + $this->log('Converting single-quoted key to double quotes'); + } + // Track where the key starts $this->currentKeyStart = strlen($this->output); $this->output .= '"'; @@ -412,8 +618,24 @@ private function handleObjectKey(string $json, int $i): int return $i + 1; } + // Handle smart/curly quotes as key delimiters + $smartQuoteLength = $this->getSmartQuoteLength($json, $i); + + if ($smartQuoteLength > 0) { + $this->log('Converting smart/curly quote to standard double quote'); + $this->currentKeyStart = strlen($this->output); + $this->output .= '"'; + $this->inString = true; + $this->stringDelimiter = '"'; // Normalize to regular quote + $this->stateBeforeString = self::STATE_IN_OBJECT_KEY; + $this->state = self::STATE_IN_STRING; + + return $i + $smartQuoteLength; + } + // Unquoted key if (ctype_alnum($char) || $char === '_' || $char === '-') { + $this->log('Adding quotes around unquoted key'); // Track where the key starts $this->currentKeyStart = strlen($this->output); $this->output .= '"'; @@ -431,6 +653,17 @@ private function handleObjectKey(string $json, int $i): int return $i + 1; } + /** + * Handle the state expecting a colon after an object key. + * + * Processes the colon separator between a key and its value in an object. + * If a colon is not present, one will be inserted. + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ private function handleExpectingColon(string $json, int $i): int { $char = $json[$i]; @@ -439,11 +672,19 @@ private function handleExpectingColon(string $json, int $i): int $this->output .= ':'; $this->state = self::STATE_IN_OBJECT_VALUE; - return $i + 1; + // Preserve whitespace after colon + $nextI = $i + 1; + while ($nextI < strlen($json) && $json[$nextI] === ' ') { + $this->output .= ' '; + $nextI++; + } + + return $nextI; } // Missing colon, insert it if (! ctype_space($char)) { + $this->log('Inserting missing colon after key'); $this->output .= ':'; $this->state = self::STATE_IN_OBJECT_VALUE; @@ -453,6 +694,17 @@ private function handleExpectingColon(string $json, int $i): int return $i + 1; } + /** + * Handle parsing an object value. + * + * Processes the value portion of a key-value pair in an object. + * Can handle nested objects, arrays, strings, booleans, null, and numbers. + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ private function handleObjectValue(string $json, int $i): int { $char = $json[$i]; @@ -478,6 +730,17 @@ private function handleObjectValue(string $json, int $i): int } if ($char === '"' || $char === "'") { + // Check for double quote at start of value (e.g., {"key": ""value"}) + // Skip the first quote if it's immediately followed by another quote and then non-quote content + // Check what comes after the second quote + if ($i + 1 < strlen($json) && $json[$i + 1] === $char && ($i + 2 < strlen( + $json, + ) && $json[$i + 2] !== $char && $json[$i + 2] !== '}' && $json[$i + 2] !== ',')) { + // Pattern like ""value" - skip the empty quotes and use the value + // Skip the first quote entirely + return $i + 1; + } + $this->output .= '"'; $this->inString = true; $this->stringDelimiter = $char; @@ -488,10 +751,18 @@ private function handleObjectValue(string $json, int $i): int } if ($char === '}') { - if (str_ends_with($this->output, ':')) { + // Check for missing value - output ends with colon (possibly followed by space) + $trimmedOutput = rtrim($this->output); + + if (str_ends_with($trimmedOutput, ':')) { + // Remove trailing space(s) after colon before adding empty value + $this->output = $trimmedOutput; + if ($this->omitEmptyValues) { + $this->log('Removing key with missing value (omitEmptyValues enabled)'); $this->removeCurrentKey(); } else { + $this->log('Adding empty string for missing value'); $this->output .= '""'; } } @@ -508,7 +779,16 @@ private function handleObjectValue(string $json, int $i): int $matchResult = preg_match('/^(true|false|null|True|False|None)\b/i', substr($json, $i), $matches); if ($matchResult === 1) { - $this->output .= $this->normalizeBoolean($matches[1]); + $normalized = $this->normalizeBoolean($matches[1]); + + if ($matches[1] !== $normalized) { + $this->log('Normalizing boolean/null value', [ + 'from' => $matches[1], + 'to' => $normalized, + ]); + } + + $this->output .= $normalized; $this->state = self::STATE_EXPECTING_COMMA_OR_END; // Reset key tracking after successfully completing a boolean/null value $this->currentKeyStart = -1; @@ -526,8 +806,10 @@ private function handleObjectValue(string $json, int $i): int // Missing value if ($char === ',' || $char === '}') { if ($this->omitEmptyValues) { + $this->log('Removing key with missing value (omitEmptyValues enabled)'); $this->removeCurrentKey(); } else { + $this->log('Adding empty string for missing value'); $this->output .= '""'; } @@ -536,9 +818,40 @@ private function handleObjectValue(string $json, int $i): int return $i; } + // Handle smart/curly quotes - treat them as regular quotes + $smartQuoteLength = $this->getSmartQuoteLength($json, $i); + + if ($smartQuoteLength > 0) { + $this->output .= '"'; + $this->inString = true; + $this->stringDelimiter = '"'; + $this->stateBeforeString = self::STATE_IN_OBJECT_VALUE; + $this->state = self::STATE_IN_STRING; + + return $i + $smartQuoteLength; + } + + // Handle unquoted string values + if (ctype_alpha($char) || $char === '_') { + $this->log('Found unquoted string value, adding quotes'); + + return $this->handleUnquotedStringValue($json, $i); + } + return $i + 1; } + /** + * Handle parsing an array value. + * + * Processes elements within a JSON array. + * Can handle nested objects, arrays, strings, booleans, null, and numbers. + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ private function handleArrayValue(string $json, int $i): int { $char = $json[$i]; @@ -598,6 +911,17 @@ private function handleArrayValue(string $json, int $i): int return $i + 1; } + /** + * Handle the state expecting a comma or closing bracket/brace. + * + * Processes the separator between elements in an array or key-value pairs + * in an object, or the closing character that ends the structure. + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ private function handleExpectingCommaOrEnd(string $json, int $i): int { $char = $json[$i]; @@ -616,11 +940,19 @@ private function handleExpectingCommaOrEnd(string $json, int $i): int $this->output .= ','; $this->state = $top === '}' ? self::STATE_IN_OBJECT_KEY : self::STATE_IN_ARRAY; - return $i + 1; + // Preserve whitespace after comma + $nextI = $i + 1; + while ($nextI < strlen($json) && $json[$nextI] === ' ') { + $this->output .= ' '; + $nextI++; + } + + return $nextI; } // Missing comma, insert it if (! ctype_space($char) && $char !== $top) { + $this->log('Inserting missing comma'); $this->output .= ','; $this->state = $top === '}' ? self::STATE_IN_OBJECT_KEY : self::STATE_IN_ARRAY; @@ -630,6 +962,18 @@ private function handleExpectingCommaOrEnd(string $json, int $i): int return $i + 1; } + /** + * Handle parsing a numeric value. + * + * Processes numbers including integers, floats, and numbers with + * scientific notation (e.g., 1.23e-4). Handles positive and negative + * signs, decimal points, and exponents. + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ private function handleNumber(string $json, int $i): int { $length = strlen($json); @@ -689,52 +1033,85 @@ private function handleNumber(string $json, int $i): int return $i; } - private function handleEscapeSequence(string $char): void + /** + * Handle an escape sequence within a string. + * + * Processes escape sequences like \", \\, \/, \b, \f, \n, \r, \t, and + * unicode escapes (\uXXXX). Invalid or incomplete escapes are treated + * as literal backslash followed by the character. + */ + /** + * Handle an escape sequence within a string. + * + * Processes escape sequences like \", \\, \/, \b, \f, \n, \r, \t, and + * unicode escapes (\uXXXX). Invalid or incomplete escapes are treated + * as escaped backslash followed by the character. + * + * @return int Number of extra characters consumed beyond the escape character itself + */ + private function handleEscapeSequence(string $char, string $json): int { - $escapeMap = [ - '"' => '"', - '\\' => '\\', - '/' => '/', - 'b' => "\b", - 'f' => "\f", - 'n' => "\n", - 'r' => "\r", - 't' => "\t", - ]; - - if (isset($escapeMap[$char])) { + $validEscapes = ['"', '\\', '/', 'b', 'f', 'n', 'r', 't']; + + if (in_array($char, $validEscapes, true)) { $this->output .= '\\' . $char; - } elseif ($char === 'u' && $this->pos + 4 < strlen($this->json)) { - // Unicode escape - $hex = substr($this->json, $this->pos + 1, 4); + + return 0; + } + + if ($char === 'u' && $this->pos + 4 < strlen($json)) { + $hex = substr($json, $this->pos + 1, 4); if (ctype_xdigit($hex)) { $this->output .= '\\u' . $hex; - } else { - // Invalid unicode escape - output as literal backslash + u - $this->output .= '\\' . $char; + + return 4; // Consumed 4 extra hex digits } - } else { - // Unknown escape sequence or incomplete - output as literal backslash + char - // This handles incomplete escapes (e.g., string ends with \) - $this->output .= '\\' . $char; } + + // Invalid escape sequence - escape the backslash and output the character literally + $this->output .= '\\\\' . $char; + + return 0; } + /** + * Determine the next state after completing a string. + * + * Returns STATE_EXPECTING_COLON after a key, or STATE_EXPECTING_COMMA_OR_END after a value. + */ private function getNextStateAfterString(): int { - return $this->stateBeforeString === self::STATE_IN_OBJECT_KEY - ? self::STATE_EXPECTING_COLON - : self::STATE_EXPECTING_COMMA_OR_END; + if ($this->stateBeforeString === self::STATE_IN_OBJECT_KEY) { + return self::STATE_EXPECTING_COLON; + } + + return self::STATE_EXPECTING_COMMA_OR_END; } + /** + * Remove a trailing comma from the output. + */ private function removeTrailingComma(): void { - if (str_ends_with($this->output, ',')) { - $this->output = substr($this->output, 0, -1); + $trimmed = rtrim($this->output); + + if (str_ends_with($trimmed, ',')) { + $this->log('Removing trailing comma'); + $this->output = substr($trimmed, 0, -1); } } + /** + * Normalize boolean/null values to proper JSON format. + * + * Converts non-standard boolean/null values (True, False, None) to + * their proper JSON equivalents (true, false, null). + * + * @param string $value The value to normalize + * + * @return string The normalized JSON value (true, false, or null) + */ private function normalizeBoolean(string $value): string { return match (strtolower($value)) { @@ -744,20 +1121,357 @@ private function normalizeBoolean(string $value): string }; } + /** + * Remove the current key from the output. + * + * Removes the most recently added key and any preceding comma and whitespace. + * Used when omitEmptyValues or omitIncompleteStrings options are enabled. + */ private function removeCurrentKey(): void { - if ($this->currentKeyStart >= 0) { - $beforeKey = substr($this->output, 0, $this->currentKeyStart); - // Remove preceding comma and whitespace if present - $beforeKey = rtrim($beforeKey); + if ($this->currentKeyStart < 0) { + return; + } + + $beforeKey = rtrim(substr($this->output, 0, $this->currentKeyStart)); + + if (str_ends_with($beforeKey, ',')) { + $beforeKey = rtrim(substr($beforeKey, 0, -1)); + } + + $this->output = $beforeKey; + $this->currentKeyStart = -1; + } + + /** + * Determine if a string should be closed at a structural character. + * + * This method handles cases where a string is missing its closing quote. + * If no closing quote is found after the current position, the string + * will be closed at this structural character (} or ]). + * + * @param string $json The JSON string being parsed + * @param int $pos The position of the structural character + * + * @return bool True if the string should be closed, false otherwise + */ + private function shouldCloseStringAtStructuralChar(string $json, int $pos): bool + { + $length = strlen($json); + $char = $json[$pos]; + + // Check if there's a closing quote before the end of input + // If not, this structural character should close the string + $hasClosingQuote = false; + + for ($i = $pos + 1; $i < $length; $i++) { + if ($json[$i] === $this->stringDelimiter) { + $hasClosingQuote = true; + break; + } + + // If we hit another structural character of the same type, stop looking + if ($json[$i] === $char) { + break; + } + } + + // Close string here if no closing quote found after this position + return ! $hasClosingQuote; + } + + /** + * Determine if a quote character inside a string value should be escaped. + * + * This method handles cases like {"key": "v"alu"e"} where quotes appear + * inside the value. It determines whether a quote should be treated as + * the string terminator or as an embedded quote that needs to be escaped. + * + * @param string $json The JSON string being parsed + * @param int $quotePos The position of the quote character + * + * @return bool True if the quote should be escaped, false if it's the string terminator + */ + private function shouldEscapeQuoteInValue(string $json, int $quotePos): bool + { + // Only apply quote escaping logic for object values, not arrays + // In arrays, quotes typically delimit separate values + if ($this->stateBeforeString === self::STATE_IN_ARRAY) { + return false; + } + + $length = strlen($json); + + // Look ahead past the quote + $pos = $quotePos + 1; + + // Skip whitespace + while ($pos < $length && ctype_space($json[$pos])) { + $pos++; + } + + if ($pos >= $length) { + // End of string - this quote should close the string + return false; + } + + $nextChar = $json[$pos]; + + // If next non-whitespace is a structural character, this is a valid closing quote + if (in_array($nextChar, [',', '}', ']'], true)) { + return false; + } + + // If next non-whitespace is a colon, this is starting a new key pattern - don't escape + if ($nextChar === ':') { + return false; + } + + // If the next character is alphabetic or punctuation that could be part of text content, + // this quote might be embedded. Check if it looks like continuation of a value. + if (ctype_alpha($nextChar) || $nextChar === '_' || $nextChar === '.') { + // Look further to see if we find a colon (indicating this starts a new key) + // or if the pattern looks like continuation of a value + return $this->looksLikeContinuationNotKey($json, $pos); + } + + // If next is a quote, check what pattern it forms + if ($nextChar === '"' || $nextChar === "'") { + // Could be start of a new key like ", "key2" + // Look for the key-colon pattern + return $this->looksLikeEmbeddedQuote($json, $pos); + } + + return false; + } + + /** + * Check if the text starting at $pos looks like string continuation rather than a new key. + * + * Scans ahead to determine whether the text after a quote represents + * continuation of the current value or the start of a new key-value pair. + */ + private function looksLikeContinuationNotKey(string $json, int $pos): bool + { + $length = strlen($json); + $scanPos = $pos; + $colonPos = -1; + + while ($scanPos < $length) { + $char = $json[$scanPos]; + + if ($char === ':') { + $colonPos = $scanPos; + break; + } + + if ($char === '"' || $char === "'") { + return ! $this->isNewKeyValuePair($json, $scanPos); + } + + if (in_array($char, [',', '}', ']'], true)) { + return true; + } + + $scanPos++; + } + + if ($colonPos === -1) { + return true; + } + + $textBeforeColon = trim(substr($json, $pos, $colonPos - $pos)); + + // Empty text, spaces, or special characters indicate continuation, not a new key + if ($textBeforeColon === '' || str_contains($textBeforeColon, ' ')) { + return true; + } + + return (bool) preg_match('/[^a-zA-Z0-9_-]/', $textBeforeColon); + } + + /** + * Check if a quote at position starts a new key-value pair pattern. + * + * Returns true if the quote represents the start of a new key in a "key": "value" pattern. + */ + private function isNewKeyValuePair(string $json, int $quotePos): bool + { + $length = strlen($json); + $pos = $quotePos + 1; + + // Find the closing quote + while ($pos < $length && $json[$pos] !== '"' && $json[$pos] !== "'") { + if ($json[$pos] === '\\' && $pos + 1 < $length) { + $pos += 2; + continue; + } + + $pos++; + } + + if ($pos >= $length) { + return false; + } + + // Skip past closing quote and whitespace + $pos++; + while ($pos < $length && ctype_space($json[$pos])) { + $pos++; + } + + // A colon following indicates a new key-value pair + return $pos < $length && $json[$pos] === ':'; + } + + /** + * Check if a quote at position looks like an embedded quote in a value. + * + * Returns true if the quote is embedded within a string value rather than + * starting a new key-value pair. + */ + private function looksLikeEmbeddedQuote(string $json, int $quotePos): bool + { + return ! $this->isNewKeyValuePair($json, $quotePos); + } + + /** + * Handle an unquoted string value in an object. + * + * Reads an unquoted string value (e.g., {key: value}) and wraps it in quotes. + * The value ends when a structural character (, } ]) or a quote is encountered. + * + * @param string $json The JSON string being parsed + * @param int $i The current position in the string + * + * @return int The next position to parse + */ + private function handleUnquotedStringValue(string $json, int $i): int + { + $length = strlen($json); + $value = ''; - if (str_ends_with($beforeKey, ',')) { - $beforeKey = substr($beforeKey, 0, -1); - $beforeKey = rtrim($beforeKey); + // Collect the unquoted value + while ($i < $length) { + $char = $json[$i]; + + // Stop at structural characters or quotes + if (in_array($char, [',', '}', ']', '"', "'"], true)) { + break; } - $this->output = $beforeKey; + $value .= $char; + $i++; + } + + // Trim trailing whitespace from the value + $value = rtrim($value); + + // Check if this looks like an incomplete boolean/null (e.g., "tru", "fals", "nul", "tr") + // These should be treated as empty values, not quoted strings + $lowerValue = strtolower($value); + $incompletePatterns = ['t', 'tr', 'tru', 'f', 'fa', 'fal', 'fals', 'n', 'nu', 'nul']; + + if (in_array($lowerValue, $incompletePatterns, true)) { + // This is an incomplete boolean/null at end of input - treat as empty value + // Only do this if we're at the end of the JSON (no more meaningful content) + $remainingJson = substr($json, $i); + $trimmedRemaining = trim($remainingJson); + + // If the remaining content is just closing braces/brackets, this is incomplete + if ($trimmedRemaining === '' || preg_match('/^[}\]]+$/', $trimmedRemaining) === 1) { + if ($this->omitEmptyValues) { + $this->removeCurrentKey(); + } else { + $this->output .= '""'; + } + + $this->state = self::STATE_EXPECTING_COMMA_OR_END; + $this->currentKeyStart = -1; + + return $i; + } + } + + // If we stopped because we hit a quote, check if it's part of a new key-value pair + // Check if this looks like a new key pattern ("key":) + if ($i < $length && ($json[$i] === '"' || $json[$i] === "'") && $this->isNewKeyValuePair($json, $i)) { + // This is a new key, so the unquoted value ends here + // Output the unquoted value as a quoted string + $this->output .= '"' . $this->escapeStringValue($value) . '"'; $this->currentKeyStart = -1; + // Insert a comma before the new key and set state to expect the key + $this->output .= ', '; + $this->state = self::STATE_IN_OBJECT_KEY; + + return $i; } + + // Output the unquoted value as a quoted string + if ($value !== '') { + $this->output .= '"' . $this->escapeStringValue($value) . '"'; + $this->state = self::STATE_EXPECTING_COMMA_OR_END; + $this->currentKeyStart = -1; + } + + return $i; + } + + /** + * Escape special characters in a string value for JSON output. + */ + private function escapeStringValue(string $value): string + { + return str_replace(['\\', '"'], ['\\\\', '\\"'], $value); + } + + /** + * Check if the character at the given position is a smart/curly quote. + * + * Smart quotes are typographic quote characters like " " ' ' that are + * sometimes used instead of regular ASCII quotes. Returns the byte length + * (3 for UTF-8 smart quotes) or 0 if not a smart quote. + */ + private function getSmartQuoteLength(string $json, int $pos): int + { + if ($pos + 2 >= strlen($json)) { + return 0; + } + + $threeBytes = substr($json, $pos, 3); + + if (in_array($threeBytes, ["\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x98", "\xE2\x80\x99"], true)) { + return 3; + } + + return 0; + } + + /** + * Log a repair action with context. + * + * @param string $message Description of the repair action + * @param array $context Additional context data + */ + private function log(string $message, array $context = []): void + { + $this->logger?->debug($message, array_merge([ + 'position' => $this->pos, + 'context' => $this->getContextSnippet(), + ], $context)); + } + + /** + * Get a snippet of the JSON around the current position for logging context. + */ + private function getContextSnippet(int $window = 15): string + { + $start = max(0, $this->pos - $window); + $end = min(strlen($this->json), $this->pos + $window); + + $before = substr($this->json, $start, $this->pos - $start); + $after = substr($this->json, $this->pos, $end - $this->pos); + + return $before . '>>>' . $after; } } diff --git a/src/functions.php b/src/functions.php index be87879..54d8751 100644 --- a/src/functions.php +++ b/src/functions.php @@ -4,6 +4,8 @@ namespace Cortex\JsonRepair; +use Psr\Log\LoggerInterface; + /** * Repair a broken JSON string. * @@ -11,6 +13,7 @@ * @param bool $ensureAscii Whether to escape non-ASCII characters (default: true) * @param bool $omitEmptyValues Whether to remove keys with missing values instead of adding empty strings (default: false) * @param bool $omitIncompleteStrings Whether to remove keys with incomplete string values instead of closing them (default: false) + * @param \Psr\Log\LoggerInterface|null $logger Optional PSR-3 logger for debugging repair actions * * @return string The repaired JSON string */ @@ -19,9 +22,14 @@ function json_repair( bool $ensureAscii = true, bool $omitEmptyValues = false, bool $omitIncompleteStrings = false, + ?LoggerInterface $logger = null, ): string { $repairer = new JsonRepairer($json, $ensureAscii, $omitEmptyValues, $omitIncompleteStrings); + if ($logger instanceof LoggerInterface) { + $repairer->setLogger($logger); + } + return $repairer->repair(); } @@ -34,6 +42,7 @@ function json_repair( * @param bool $ensureAscii Whether to escape non-ASCII characters (default: true) * @param bool $omitEmptyValues Whether to remove keys with missing values instead of adding empty strings (default: false) * @param bool $omitIncompleteStrings Whether to remove keys with incomplete string values instead of closing them (default: false) + * @param \Psr\Log\LoggerInterface|null $logger Optional PSR-3 logger for debugging repair actions * * @return array|object The decoded JSON data */ @@ -44,8 +53,13 @@ function json_repair_decode( bool $ensureAscii = true, bool $omitEmptyValues = false, bool $omitIncompleteStrings = false, + ?LoggerInterface $logger = null, ): array|object { $repairer = new JsonRepairer($json, $ensureAscii, $omitEmptyValues, $omitIncompleteStrings); + if ($logger instanceof LoggerInterface) { + $repairer->setLogger($logger); + } + return $repairer->decode($depth, $flags); } diff --git a/tests/Datasets/JsonRepair.php b/tests/Datasets/JsonRepair.php new file mode 100644 index 0000000..d8e54f0 --- /dev/null +++ b/tests/Datasets/JsonRepair.php @@ -0,0 +1,578 @@ + [ + '{"key": "value",}', + '{"key": "value"}', + ], + 'object with multiple keys and trailing comma' => [ + '{"key1": "v1", "key2": "v2",}', + '{"key1": "v1", "key2": "v2"}', + ], + 'array with trailing comma' => [ + '[1, 2, 3,]', + '[1, 2, 3]', + ], +]); + +dataset('missing_commas', [ + 'object missing comma' => [ + '{"key1": "v1" "key2": "v2"}', + '{"key1": "v1","key2": "v2"}', + ], + 'array missing commas' => [ + '["a" "b" "c"]', + '["a","b","c"]', + ], +]); + +dataset('missing_closing_brackets', [ + 'object missing closing brace' => [ + '{"key": "value"', + '{"key": "value"}', + ], + 'array missing closing bracket' => [ + '["a", "b"', + '["a", "b"]', + ], +]); + +dataset('missing_closing_braces', [ + 'simple object' => ['{"key": "value"', 'key', 'value'], + 'nested object' => ['{"key1": {"key2": "value"', 'key1.key2', 'value'], +]); + +dataset('missing_values', [ + 'single missing value' => [ + '{"key": }', + '{"key":""}', + ], + 'multiple keys with missing value' => [ + '{"key1": "v1", "key2": }', + '{"key1": "v1", "key2":""}', + ], +]); + +// ============================================================================ +// QUOTES +// ============================================================================ + +dataset('single_quotes_to_double', [ + 'single key-value' => [ + "{'key': 'value'}", + '{"key": "value"}', + ], + 'multiple key-values' => [ + "{'name': 'John', 'age': 30}", + '{"name": "John", "age": 30}', + ], +]); + +dataset('unquoted_keys', [ + 'single unquoted key' => [ + '{key: "value"}', + '{"key": "value"}', + ], + 'multiple unquoted keys' => [ + '{name: "John", age: 30}', + '{"name": "John", "age": 30}', + ], +]); + +dataset('mixed_quotes', [ + 'mixed single and double quotes' => [ + "{'key': 'string', 'key2': false, \"key3\": null, \"key4\": unquoted}", + '{"key": "string", "key2": false, "key3": null, "key4": "unquoted"}', + ], + 'unquoted value in middle' => [ + '{"name": "John", "age": 30, "city": New York}', + '{"name": "John", "age": 30, "city": "New York"}', + ], + 'unquoted value at start' => [ + '{"name": John, "age": 30, "city": "New York"}', + '{"name": "John", "age": 30, "city": "New York"}', + ], + 'slanted delimiters' => [ + '{""slanted_delimiter"": "value"}', + '{"slanted_delimiter": "value"}', + ], + 'double quotes inside string value' => [ + '{"key": ""value"}', + '{"key": "value"}', + ], + 'numeric key' => [ + '{"key": "value", 5: "value"}', + '{"key": "value", "5": "value"}', + ], + 'empty key' => [ + '{"" key":"val"}', + '{" key":"val"}', + ], + 'unquoted value before quoted key' => [ + '{"key": value "key2" : "value2"}', + '{"key": "value", "key2": "value2"}', + ], + 'trailing comma and space' => [ + '{"key": value , }', + '{"key": "value"}', + ], +]); + +dataset('quotes_inside_strings', [ + 'quotes inside string with comma' => [ + '{"key": "lorem ipsum ... "sic " tamet. ...}', + '{"key": "lorem ipsum ... \\"sic \\" tamet. ..."}', + ], + 'quotes inside string with comma and text' => [ + '{"comment": "lorem, "ipsum" sic "tamet". To improve"}', + '{"comment": "lorem, \\"ipsum\\" sic \\"tamet\\". To improve"}', + ], + 'quotes splitting value' => [ + '{"key": "v"alu"e"}', + '{"key": "v\\"alu\\"e"}', + ], + 'quotes splitting value with comma' => [ + '{"key": "v"alue", "key2": "value2"}', + '{"key": "v\\"alue", "key2": "value2"}', + ], + 'quotes splitting value in array' => [ + '[{"key": "v"alu,e", "key2": "value2"}]', + '[{"key": "v\\"alu,e", "key2": "value2"}]', + ], +]); + +// ============================================================================ +// INCOMPLETE JSON (streaming, cut-off) +// ============================================================================ + +dataset('incomplete_json', [ + 'incomplete string value' => [ + '{"key": "val', + '{"key": "val"}', + ], + 'missing value' => [ + '{"key": ', + '{"key":""}', + ], + 'incomplete array' => [ + '["a", "b', + '["a", "b"]', + ], +]); + +dataset('streaming_llm_responses', [ + 'cut off mid-string value' => [ + '{"name": "John", "description": "A person who', + '{"name": "John", "description": "A person who"}', + ], + 'cut off mid-number' => [ + '{"count": 123', + '{"count": 123}', + ], + 'cut off mid-decimal' => [ + '{"price": 99.9', + '{"price": 99.9}', + ], + 'cut off mid-boolean' => [ + '{"active": tru', + '{"active": ""}', + ], + 'cut off after colon' => [ + '{"name": "John", "age": ', + '{"name": "John", "age":""}', + ], + 'cut off mid-key' => [ + '{"name": "John", "user', + '{"name": "John", "user":""}', + ], + 'cut off mid-object' => [ + '{"user": {"name": "John", "age": 30', + '{"user": {"name": "John", "age": 30}}', + ], + 'cut off mid-nested-object' => [ + '{"data": {"user": {"name": "John", "profile": {"bio": "Developer"', + '{"data": {"user": {"name": "John", "profile": {"bio": "Developer"}}}}', + ], + 'cut off mid-array' => [ + '{"items": [1, 2, 3', + '{"items": [1, 2, 3]}', + ], + 'cut off mid-array-with-objects' => [ + '{"users": [{"name": "John"}, {"name": "Jane"', + '{"users": [{"name": "John"}, {"name": "Jane"}]}', + ], + 'cut off mid-string-in-array' => [ + '{"tags": ["php", "json", "repair"', + '{"tags": ["php", "json", "repair"]}', + ], + 'cut off after comma' => [ + '{"name": "John", "age": 30, ', + '{"name": "John", "age": 30}', + ], + 'cut off mid-escape-sequence' => [ + '{"message": "Hello\\', + '{"message": "Hello"}', + ], + 'cut off mid-complete-unicode-escape' => [ + '{"emoji": "\\u263a', + '{"emoji": "\\u263a"}', + ], + 'cut off mid-incomplete-unicode-escape' => [ + '{"emoji": "\\u26', + '{"emoji": "\\\\u26"}', + ], + 'multiple-incomplete-values' => [ + '{"name": "John", "age": 30, "bio": "A developer who loves', + '{"name": "John", "age": 30, "bio": "A developer who loves"}', + ], + 'cut off mid-null' => [ + '{"value": nul', + '{"value": ""}', + ], + 'cut off mid-false' => [ + '{"enabled": fals', + '{"enabled": ""}', + ], + 'cut off mid-true' => [ + '{"active": tr', + '{"active": ""}', + ], + 'cut off with-trailing-comma-before-incomplete' => [ + '{"name": "John", "age": 30, "bio": "A', + '{"name": "John", "age": 30, "bio": "A"}', + ], + 'cut off mid-nested-array' => [ + '{"matrix": [[1, 2], [3, 4', + '{"matrix": [[1, 2], [3, 4]]}', + ], + 'cut off with-mixed-complete-and-incomplete' => [ + '{"complete": "value", "incomplete": "partial', + '{"complete": "value", "incomplete": "partial"}', + ], +]); + +// ============================================================================ +// EMBEDDED JSON (markdown, surrounding text) +// ============================================================================ + +dataset('multiple_json_objects', [ + 'empty array and object' => ['[]{}', null, null], + 'array then object' => ['[]{"key":"value"}', 'key', 'value'], + 'object then array' => ['{"key":"value"}[1,2,3,True]', null, null], +]); + +dataset('markdown_code_blocks', [ + 'single code block' => ['lorem ```json {"key":"value"} ``` ipsum', 'key', 'value'], + 'multiple code blocks' => ['```json {"key":"value"} ``` ```json [1,2,3,True] ```', null, null], +]); + +dataset('markdown_links', [ + 'markdown link in string' => [ + '{ "content": "[LINK]("https://google.com")" }', + '{"content": "[LINK](","https":"google.com",")":""}', + ], + 'incomplete markdown link' => [ + '{ "content": "[LINK](" }', + '{ "content": "[LINK](" }', + ], + 'incomplete markdown link with other keys' => [ + '{ "content": "[LINK](", "key": true }', + '{ "content": "[LINK](", "key": true }', + ], +]); + +dataset('leading_trailing_characters', [ + 'multiple backticks' => [ + '````{ "key": "value" }```', + '{"key": "value"}', + ], + 'trailing backticks with newlines' => [ + "{ \"a\": \"\", \"b\": [ { \"c\": 1} ] \n}```", + '{"a": "", "b": [{"c": 1}]}', + ], + 'text before markdown code block' => [ + "Based on the information extracted, here is the filled JSON output: ```json { 'a': 'b' } ```", + '{"a": "b"}', + ], + 'multiline text before code block' => [ + ' + The next 64 elements are: + ```json + { "key": "value" } + ```', + '{"key": "value"}', + ], +]); + +dataset('json_in_strings', [ + 'backticks in string value' => [ + '{"key": "``"}', + '{"key": "``"}', + ], + 'json code block in string' => [ + '{"key": "```json"}', + '{"key": "```json"}', + ], + 'nested JSON code block in string' => [ + '{"key": "```json {"key": [{"key1": 1},{"key2": 2}]}```"}', + '{"key": [{"key1": 1},{"key2": 2}]}', + ], + 'incomplete JSON code block in string' => [ + '{"response": "```json{}"}', + '{"response": "```json{}"}', + ], +]); + +// ============================================================================ +// SPECIAL CHARACTERS & ESCAPING +// ============================================================================ + +dataset('special_characters', [ + 'comma in string' => ['{"text": "The quick brown fox,"}', 'text', 'The quick brown fox,'], + 'apostrophe in string' => ['{"text": "The quick brown fox won\'t jump"}', 'text', "The quick brown fox won't jump"], + 'colon in string' => ['{"key": "value:value"}', 'key', 'value:value'], +]); + +dataset('escape_sequences', [ + 'newline' => ['{"key": "value\\nvalue"}', "value\nvalue"], + 'tab' => ['{"key": "value\\tvalue"}', "value\tvalue"], + 'escaped quote' => ['{"key": "value\\"value"}', 'value"value'], + 'backslash' => ['{"key": "value\\\\value"}', 'value\\value'], + 'carriage return' => ['{"key": "value\\rvalue"}', "value\rvalue"], + 'form feed' => ['{"key": "value\\fvalue"}', "value\fvalue"], + 'backspace' => ['{"key": "value\\bvalue"}', "value\x08value"], + 'forward slash' => ['{"key": "value\\/value"}', 'value/value'], + 'unicode escape' => ['{"key": "value\\u263avalue"}', 'value☺value'], + 'invalid unicode escape' => ['{"key": "value\\uXXYYvalue"}', 'value\\uXXYYvalue'], + 'invalid escape sequence' => ['{"key": "value\\xvalue"}', 'value\\xvalue'], +]); + +dataset('advanced_escaping', [ + 'mixed quote escaping with newlines' => [ + '{"key": \'string"\n\t\\le\'}', + '{"key": "string\"\\n\\t\\\\le"}', + ], + 'unicode escape sequences' => [ + '{"key": "\u0076\u0061\u006c\u0075\u0065"}', + '{"key": "\u0076\u0061\u006c\u0075\u0065"}', + ], + 'single quote in double-quoted string' => [ + '{"key": "valu\'e"}', + '{"key": "valu\'e"}', + ], + 'nested JSON string' => [ + '{\'key\': "{\\"key\\": 1, \\"key2\\": 1}"}', + '{"key": "{\\"key\\": 1, \\"key2\\": 1}"}', + ], + 'newline in key' => [ + '{"key_1\n": "value"}', + '{"key_1\\n": "value"}', + ], + 'tab in key' => [ + '{"key\t_": "value"}', + '{"key\\t_": "value"}', + ], +]); + +// ============================================================================ +// VALUES & TYPES +// ============================================================================ + +dataset('booleans_and_null', [ + 'capitalized True' => ['{"key": True}', '{"key": true}'], + 'capitalized False' => ['{"key": False}', '{"key": false}'], + 'capitalized None' => ['{"key": None}', '{"key": null}'], + 'JSON true' => ['{"key": true}', '{"key": true}'], + 'JSON false' => ['{"key": false}', '{"key": false}'], + 'JSON null' => ['{"key": null}', '{"key": null}'], + 'array with capitalized booleans' => ['[True, False, None]', '[true, false, null]'], +]); + +dataset('standalone_booleans_null', [ + 'standalone True' => ['True', ''], + 'standalone False' => ['False', ''], + 'standalone Null' => ['Null', ''], + 'standalone true' => ['true', 'true'], + 'standalone false' => ['false', 'false'], + 'standalone null' => ['null', 'null'], +]); + +dataset('numbers', [ + 'positive integer' => ['{"key": 123}', 123], + 'negative integer' => ['{"key": -123}', -123], + 'decimal' => ['{"key": 123.456}', 123.456], + 'scientific notation' => ['{"key": 123e10}', 'validate_only'], + 'large integer' => ['{"key": 12345678901234567890}', 'validate_only'], +]); + +dataset('empty_strings', [ + 'incomplete empty string' => [ + '{"key": ""', + '{"key": ""}', + ], + 'complete with empty string' => [ + '{"key1": "", "key2": "value"}', + '{"key1": "", "key2": "value"}', + ], +]); + +dataset('parse_string', [ + 'single quote' => ['"', ''], + 'newline only' => ["\n", ''], + 'space only' => [' ', ''], + 'plain string' => ['string', ''], + 'text before object' => ['stringbeforeobject {}', '{}'], +]); + +// ============================================================================ +// STRUCTURES +// ============================================================================ + +dataset('empty_structures', [ + 'empty object' => ['{}', '{}'], + 'empty array' => ['[]', '[]'], + 'object with empty array' => [ + '{"key": []}', + '{"key": []}', + ], + 'object with empty object' => [ + '{"key": {}}', + '{"key": {}}', + ], +]); + +dataset('mixed_type_arrays', [ + 'JSON booleans and null' => [ + '[1, "two", true, false, null]', + '[1, "two", true, false, null]', + ], + 'capitalized booleans and null' => [ + '[True, False, None, "string", 123]', + '[true, false, null, "string", 123]', + ], +]); + +// ============================================================================ +// CONFIGURATION OPTIONS +// ============================================================================ + +dataset('omit_empty_values_true', [ + 'missing value after colon' => [ + '{"key": }', + '{}', + ], + 'missing value with other keys' => [ + '{"key1": "v1", "key2": }', + '{"key1": "v1"}', + ], + 'missing value at end' => [ + '{"name": "John", "age": ', + '{"name": "John"}', + ], + 'key without colon' => [ + '{"key"', + '{}', + ], + 'multiple missing values' => [ + '{"key1": "v1", "key2": , "key3": "v3", "key4": }', + '{"key1": "v1", "key3": "v3"}', + ], + 'nested object with missing value' => [ + '{"user": {"name": "John", "age": }}', + '{"user": {"name": "John"}}', + ], + 'all values missing' => [ + '{"key1": , "key2": }', + '{}', + ], +]); + +dataset('omit_empty_values_false', [ + 'missing value after colon' => [ + '{"key": }', + '{"key":""}', + ], + 'missing value with other keys' => [ + '{"key1": "v1", "key2": }', + '{"key1": "v1", "key2":""}', + ], +]); + +dataset('omit_incomplete_strings_true', [ + 'cut off mid-string value' => [ + '{"name": "John", "description": "A person who', + '{"name": "John"}', + ], + 'incomplete string at end' => [ + '{"key": "val', + '{}', + ], + 'multiple incomplete strings' => [ + '{"name": "John", "bio": "A developer who', + '{"name": "John"}', + ], + 'complete and incomplete strings' => [ + '{"complete": "value", "incomplete": "partial', + '{"complete": "value"}', + ], + 'nested object with incomplete string' => [ + '{"user": {"name": "John", "bio": "A person', + '{"user": {"name": "John"}}', + ], + 'all strings incomplete' => [ + '{"key1": "val1', + '{}', + ], +]); + +dataset('omit_incomplete_strings_false', [ + 'cut off mid-string value' => [ + '{"name": "John", "description": "A person who', + '{"name": "John", "description": "A person who"}', + ], + 'incomplete string at end' => [ + '{"key": "val', + '{"key": "val"}', + ], +]); + +dataset('combined_options', [ + 'missing value and incomplete string' => [ + '{"name": "John", "age": , "bio": "A developer who', + '{"name": "John"}', + ], + 'multiple issues' => [ + '{"key1": "v1", "key2": , "key3": "partial', + '{"key1": "v1"}', + ], + 'all values problematic' => [ + '{"key1": , "key2": "incomplete', + '{}', + ], +]); diff --git a/tests/Unit/JsonRepairerTest.php b/tests/Unit/JsonRepairerTest.php index be77f91..73bf8cd 100644 --- a/tests/Unit/JsonRepairerTest.php +++ b/tests/Unit/JsonRepairerTest.php @@ -5,738 +5,605 @@ namespace Cortex\JsonRepair\Tests\Unit; use Cortex\JsonRepair\JsonRepairer; +use ColinODell\PsrTestLogger\TestLogger; use function Cortex\JsonRepair\json_repair; use function Cortex\JsonRepair\json_repair_decode; covers(JsonRepairer::class); -it('passes through valid JSON unchanged', function (string $json): void { - $result = json_repair($json); - expect(json_validate($result))->toBeTrue(); - expect(json_decode($result, true))->toBe(json_decode($json, true)); -})->with([ - '{"name": "John", "age": 30, "city": "New York"}', - '{"employees":["John", "Anna", "Peter"]}', - '{"key": "value:value"}', - '{"text": "The quick brown fox,"}', - '{"text": "The quick brown fox won\'t jump"}', - '{"key": ""}', - '{"key1": {"key2": [1, 2, 3]}}', - '{"key": 12345678901234567890}', -]); - -it('repairs single quotes to double quotes', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - foreach ($expected as $key => $value) { - expect($decoded[$key])->toBe($value); - } -})->with([ - 'single key-value' => [ - "{'key': 'value'}", [ - 'key' => 'value', - ]], - 'multiple key-values' => [ - "{'name': 'John', 'age': 30}", [ - 'name' => 'John', - 'age' => 30, - ]], -]); - -it('repairs unquoted keys', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - foreach ($expected as $key => $value) { - expect($decoded[$key])->toBe($value); - } -})->with([ - 'single unquoted key' => [ - '{key: "value"}', [ - 'key' => 'value', - ]], - 'multiple unquoted keys' => [ - '{name: "John", age: 30}', [ - 'name' => 'John', - 'age' => 30, - ]], -]); - -it('repairs missing quotes around keys', function (): void { - $result = json_repair('{key: "value"}'); - expect(json_validate($result))->toBeTrue(); - expect(json_decode($result, true)['key'])->toBe('value'); -}); +describe('JSON repairs', function (): void { + it('passes through valid JSON unchanged', function (string $json): void { + $result = json_repair($json); + expect(json_validate($result))->toBeTrue(); + expect(json_decode($result, true))->toBe(json_decode($json, true)); + })->with('valid_json'); -it('repairs trailing commas', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'object with trailing comma' => [ - '{"key": "value",}', [ - 'key' => 'value', - ]], - 'object with multiple keys and trailing comma' => [ - '{"key1": "v1", "key2": "v2",}', [ - 'key1' => 'v1', - 'key2' => 'v2', - ]], - 'array with trailing comma' => ['[1, 2, 3,]', [1, 2, 3]], -]); - -it('repairs missing commas', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'object missing comma' => [ - '{"key1": "v1" "key2": "v2"}', [ - 'key1' => 'v1', - 'key2' => 'v2', - ]], - 'array missing commas' => ['["a" "b" "c"]', ['a', 'b', 'c']], -]); - -it('repairs missing colons', function (): void { - $result = json_repair('{"key" "value"}'); - expect(json_validate($result))->toBeTrue(); - expect(json_decode($result, true)['key'])->toBe('value'); + it('handles non-JSON strings', function (string $input, string $expected): void { + $result = json_repair($input); + expect($result)->toBe($expected); + + if ($result !== '') { + expect(json_validate($result))->toBeTrue(); + expect(json_decode($result, true))->toBe([]); + } + })->with('parse_string'); + + it('repairs single quotes to double quotes', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('single_quotes_to_double'); + + it('repairs unquoted keys', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('unquoted_keys'); + + it('repairs missing quotes around keys', function (): void { + $result = json_repair('{key: "value"}'); + expect(json_validate($result))->toBeTrue(); + expect(json_decode($result, true)['key'])->toBe('value'); + }); + + it('handles mixed single and double quotes', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('mixed_quotes'); + + it('handles quotes inside string values', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + })->with('quotes_inside_strings'); + + it('repairs trailing commas', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('trailing_commas'); + + it('repairs missing commas', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('missing_commas'); + + it('repairs missing colons', function (): void { + $result = json_repair('{"key" "value"}'); + expect(json_validate($result))->toBeTrue(); + expect(json_decode($result, true)['key'])->toBe('value'); + }); + + it('repairs missing closing brackets', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('missing_closing_brackets'); + + it('repairs missing closing braces', function (string $input, string $expectedPath, string $expectedValue): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + $decoded = json_decode($result, true); + + $value = $decoded; + foreach (explode('.', $expectedPath) as $key) { + $value = $value[$key]; + } + + expect($value)->toBe($expectedValue); + })->with('missing_closing_braces'); + + it('repairs missing values', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('missing_values'); + + it('handles missing keys in objects', function (): void { + $input = '{: "value"}'; + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + $decoded = json_decode($result, true); + expect($decoded)->toBeArray(); + expect($decoded)->toHaveKey('value'); + expect($decoded['value'])->toBe(''); + }); }); -it('repairs missing closing brackets', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'object missing closing brace' => [ - '{"key": "value"', [ - 'key' => 'value', - ]], - 'array missing closing bracket' => ['["a", "b"', ['a', 'b']], -]); - -it('repairs missing closing braces', function (string $input, string $expectedPath, string $expectedValue): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - $keys = explode('.', $expectedPath); - $value = $decoded; - foreach ($keys as $key) { - $value = $value[$key]; - } - - expect($value)->toBe($expectedValue); -})->with([ - 'simple object' => ['{"key": "value"', 'key', 'value'], - 'nested object' => ['{"key1": {"key2": "value"', 'key1.key2', 'value'], -]); - -it('repairs missing values', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'single missing value' => [ - '{"key": }', [ - 'key' => '', - ]], - 'multiple keys with missing value' => [ - '{"key1": "v1", "key2": }', [ - 'key1' => 'v1', - 'key2' => '', - ]], -]); - -it('repairs non-standard booleans and null', function (string $input, mixed $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - - if (is_array($expected)) { - expect($decoded)->toBe($expected); - } else { - expect($decoded['key'])->toBe($expected); - } -})->with([ - 'capitalized True' => ['{"key": True}', true], - 'capitalized False' => ['{"key": False}', false], - 'capitalized None' => ['{"key": None}', null], - 'JSON true' => ['{"key": true}', true], - 'JSON false' => ['{"key": false}', false], - 'JSON null' => ['{"key": null}', null], - 'array with capitalized booleans' => ['[True, False, None]', [true, false, null]], -]); - -it('handles nested structures', function (string $input): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - expect(json_decode($result, true))->toBe(json_decode($input, true)); -})->with([ - '{"key1": {"key2": [1, 2, 3]}}', - '{"employees":["John", "Anna", "Peter"]}', -]); - -it('handles empty structures', function (string $input, mixed $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - expect(json_decode($result, true))->toBe($expected); -})->with([ - 'empty object' => ['{}', []], - 'empty array' => ['[]', []], - 'object with empty array' => [ - '{"key": []}', [ - 'key' => [], - ]], - 'object with empty object' => [ - '{"key": {}}', [ - 'key' => [], - ]], -]); - -it('handles numbers correctly', function (string $input, int|float|string $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - - if (is_string($expected)) { - // For large numbers, just validate they're valid JSON - expect(json_validate($result))->toBeTrue(); - } else { - expect(json_decode($result, true)['key'])->toBe($expected); - } -})->with([ - 'positive integer' => ['{"key": 123}', 123], - 'negative integer' => ['{"key": -123}', -123], - 'decimal' => ['{"key": 123.456}', 123.456], - 'scientific notation' => ['{"key": 123e10}', 'validate_only'], - 'large integer' => ['{"key": 12345678901234567890}', 'validate_only'], -]); - -it('handles multiple JSON objects', function (string $input, ?string $expectedKey, ?string $expectedValue): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - - if ($expectedKey !== null) { +describe('Values and structures', function (): void { + it('repairs non-standard booleans and null', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('booleans_and_null'); - if ($expectedValue !== null) { - expect($decoded[$expectedKey])->toBe($expectedValue); - } else { - expect($decoded)->toBeArray(); + it('handles standalone booleans and null', function (string $input, string $expected): void { + $result = json_repair($input); + expect($result)->toBe($expected); + + if ($result !== '') { + expect(json_validate($result))->toBeTrue(); } - } -})->with([ - 'empty array and object' => ['[]{}', null, null], - 'array then object' => ['[]{"key":"value"}', 'key', 'value'], - 'object then array' => ['{"key":"value"}[1,2,3,True]', null, null], -]); - -it( - 'extracts JSON from markdown code blocks', - function (string $input, ?string $expectedKey, ?string $expectedValue): void { + })->with('standalone_booleans_null'); + + it('handles numbers correctly', function (string $input, int|float|string $expected): void { $result = json_repair($input); expect(json_validate($result))->toBeTrue(); - if ($expectedKey !== null) { - expect(json_decode($result, true)[$expectedKey])->toBe($expectedValue); + if (is_string($expected)) { + return; } - }, -)->with([ - 'single code block' => ['lorem ```json {"key":"value"} ``` ipsum', 'key', 'value'], - 'multiple code blocks' => ['```json {"key":"value"} ``` ```json [1,2,3,True] ```', null, null], -]); - -it( - 'handles strings with special characters', - function (string $input, string $expectedKey, string $expectedValue): void { + + expect(json_decode($result, true)['key'])->toBe($expected); + })->with('numbers'); + + it( + 'handles strings with special characters', + function (string $input, string $expectedKey, string $expectedValue): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + $decoded = json_decode($result, true); + expect($decoded[$expectedKey])->toBe($expectedValue); + }, + )->with('special_characters'); + + it('handles escape sequences', function (string $input, string $expectedValue): void { $result = json_repair($input); expect(json_validate($result))->toBeTrue(); $decoded = json_decode($result, true); - expect($decoded[$expectedKey])->toBe($expectedValue); - }, -)->with([ - 'comma in string' => ['{"text": "The quick brown fox,"}', 'text', 'The quick brown fox,'], - 'apostrophe in string' => ['{"text": "The quick brown fox won\'t jump"}', 'text', "The quick brown fox won't jump"], - 'colon in string' => ['{"key": "value:value"}', 'key', 'value:value'], -]); - -it('handles unicode characters when ensureAscii is false', function (): void { - $input = "{'test_中国人_ascii':'统一码'}"; - $result = json_repair($input, ensureAscii: false); - expect(json_validate($result))->toBeTrue(); - expect($result)->toContain('统一码'); - expect($result)->toContain('test_中国人_ascii'); - - $decoded = json_decode($result, true); - expect($decoded)->toHaveKey('test_中国人_ascii'); - expect($decoded['test_中国人_ascii'])->toBe('统一码'); -}); + expect($decoded)->toBeArray(); + expect($decoded)->toHaveKey('key'); + expect($decoded['key'])->toBe($expectedValue); + })->with('escape_sequences'); -it('handles escape sequences', function (string $input, string $expectedValue): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBeArray(); - expect($decoded)->toHaveKey('key'); - expect($decoded['key'])->toBe($expectedValue); -})->with([ - 'newline' => ['{"key": "value\\nvalue"}', "value\nvalue"], - 'tab' => ['{"key": "value\\tvalue"}', "value\tvalue"], - 'escaped quote' => ['{"key": "value\\"value"}', 'value"value'], - 'backslash' => ['{"key": "value\\\\value"}', 'value\\value'], - 'carriage return' => ['{"key": "value\\rvalue"}', "value\rvalue"], - 'form feed' => ['{"key": "value\\fvalue"}', "value\fvalue"], - 'backspace' => ['{"key": "value\\bvalue"}', "value\x08value"], - 'forward slash' => ['{"key": "value\\/value"}', 'value/value'], - 'unicode escape' => ['{"key": "value\\u263avalue"}', 'value☺value'], - 'invalid unicode escape' => ['{"key": "value\\uXXYYvalue"}', 'value\\uXXYYvalue'], - 'invalid escape sequence' => ['{"key": "value\\xvalue"}', 'value\\xvalue'], -]); - -it('works with JsonRepairer class directly', function (): void { - $repairer = new JsonRepairer("{'key': 'value'}"); - $result = $repairer->repair(); - expect(json_validate($result))->toBeTrue(); - expect(json_decode($result, true)['key'])->toBe('value'); -}); + it('handles advanced escaping cases', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); -it('can decode repaired JSON', function (): void { - $repairer = new JsonRepairer("{'key': 'value', 'number': 123}"); - $decoded = $repairer->decode(); + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('advanced_escaping'); - expect($decoded)->toBeArray(); - expect($decoded['key'])->toBe('value'); - expect($decoded['number'])->toBe(123); -}); + it('handles unicode characters when ensureAscii is false', function (): void { + $input = "{'test_中国人_ascii':'统一码'}"; + $result = json_repair($input, ensureAscii: false); + expect(json_validate($result))->toBeTrue(); + expect($result)->toContain('统一码'); + expect($result)->toContain('test_中国人_ascii'); -it('can use json_repair_decode helper function', function (): void { - $decoded = json_repair_decode("{'key': 'value', 'number': 123}"); + $decoded = json_decode($result, true); + expect($decoded)->toHaveKey('test_中国人_ascii'); + expect($decoded['test_中国人_ascii'])->toBe('统一码'); + }); - expect($decoded)->toBeArray(); - expect($decoded['key'])->toBe('value'); - expect($decoded['number'])->toBe(123); -}); + it('handles empty strings as values', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); -it('handles complex nested structures', function (): void { - // Input has missing closing bracket after first name's prefix, causing nested structure - $input = '{"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}'; - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - - $decoded = json_decode($result, true); - expect($decoded)->toBeArray(); - expect($decoded['resourceType'])->toBe('Bundle'); - expect($decoded['id'])->toBe('1'); - expect($decoded['type'])->toBe('collection'); - expect($decoded['entry'])->toBeArray(); - expect($decoded['entry'][0]['resource']['resourceType'])->toBe('Patient'); - expect($decoded['entry'][0]['resource']['name'])->toBeArray(); - expect($decoded['entry'][0]['resource']['name'])->toHaveCount(1); - expect($decoded['entry'][0]['resource']['name'][0]['use'])->toBe('official'); - expect($decoded['entry'][0]['resource']['name'][0]['family'])->toBe('Corwin'); - expect($decoded['entry'][0]['resource']['name'][0]['given'])->toBe(['Keisha', 'Sunny']); - // Due to missing bracket, second name object is nested in prefix array - expect($decoded['entry'][0]['resource']['name'][0]['prefix'][0])->toBe('Mrs.'); - expect($decoded['entry'][0]['resource']['name'][0]['prefix'][1])->toBeArray(); - expect($decoded['entry'][0]['resource']['name'][0]['prefix'][1]['use'])->toBe('maiden'); - expect($decoded['entry'][0]['resource']['name'][0]['prefix'][1]['family'])->toBe('Goodwin'); -}); + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('empty_strings'); + + it('handles nested structures', function (string $input): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect(json_decode($result, true))->toBe(json_decode($input, true)); + })->with('nested_structures'); -it('handles strings with quotes inside', function (): void { - // Input has literal \n and unescaped quotes inside the string value - $input = '{\n"html": "

Waarom meer dan 200 Technical Experts - "Passie voor techniek"?

"}'; - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - // The \n becomes a key "n" with value "html", and unescaped quotes split the rest - expect($decoded)->toBeArray(); - expect($decoded)->toHaveKey('n'); - expect($decoded)->toHaveKey('

toBe('html'); - expect($decoded['

Waarom meer dan 200 Technical Experts - '); - expect($decoded['Passie'])->toBe('?

'); + it('handles empty structures', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('empty_structures'); + + it('handles arrays with mixed types', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('mixed_type_arrays'); }); -it('handles arrays with mixed types', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - expect(json_decode($result, true))->toBe($expected); -})->with([ - 'JSON booleans and null' => ['[1, "two", true, false, null]', [1, 'two', true, false, null]], - 'capitalized booleans and null' => ['[True, False, None, "string", 123]', [true, false, null, 'string', 123]], -]); - -it('handles empty strings as values', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'incomplete empty string' => [ - '{"key": ""', [ - 'key' => '', - ]], - 'complete with empty string' => [ - '{"key1": "", "key2": "value"}', [ - 'key1' => '', - 'key2' => 'value', - ]], -]); - -it('handles missing keys in objects', function (): void { - // This is a tricky case - missing key before colon - $input = '{: "value"}'; - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBeArray(); - // When key is missing, it treats the value as the key with empty value - expect($decoded)->toHaveKey('value'); - expect($decoded['value'])->toBe(''); +describe('Edge cases and special features', function (): void { + it('handles incomplete JSON at end of string', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('incomplete_json'); + + it('repairs incomplete JSON from streaming LLM responses', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('streaming_llm_responses'); + + it('handles complex nested structures', function (): void { + $input = '{"resourceType": "Bundle", "id": "1", "type": "collection", "entry": [{"resource": {"resourceType": "Patient", "id": "1", "name": [{"use": "official", "family": "Corwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."}, {"use": "maiden", "family": "Goodwin", "given": ["Keisha", "Sunny"], "prefix": ["Mrs."]}]}}]}'; + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + + $decoded = json_decode($result, true); + expect($decoded)->toBeArray(); + expect($decoded['resourceType'])->toBe('Bundle'); + expect($decoded['id'])->toBe('1'); + expect($decoded['type'])->toBe('collection'); + expect($decoded['entry'])->toBeArray(); + expect($decoded['entry'][0]['resource']['resourceType'])->toBe('Patient'); + expect($decoded['entry'][0]['resource']['name'])->toBeArray(); + expect($decoded['entry'][0]['resource']['name'])->toHaveCount(1); + expect($decoded['entry'][0]['resource']['name'][0]['use'])->toBe('official'); + expect($decoded['entry'][0]['resource']['name'][0]['family'])->toBe('Corwin'); + expect($decoded['entry'][0]['resource']['name'][0]['given'])->toBe(['Keisha', 'Sunny']); + expect($decoded['entry'][0]['resource']['name'][0]['prefix'][0])->toBe('Mrs.'); + expect($decoded['entry'][0]['resource']['name'][0]['prefix'][1])->toBeArray(); + expect($decoded['entry'][0]['resource']['name'][0]['prefix'][1]['use'])->toBe('maiden'); + expect($decoded['entry'][0]['resource']['name'][0]['prefix'][1]['family'])->toBe('Goodwin'); + }); + + it('handles multiple JSON objects', function (string $input, ?string $expectedKey, ?string $expectedValue): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + + if ($expectedKey !== null) { + $decoded = json_decode($result, true); + + if ($expectedValue !== null) { + expect($decoded[$expectedKey])->toBe($expectedValue); + } else { + expect($decoded)->toBeArray(); + } + } + })->with('multiple_json_objects'); + + it( + 'extracts JSON from markdown code blocks', + function (string $input, ?string $expectedKey, ?string $expectedValue): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + + if ($expectedKey !== null) { + expect(json_decode($result, true)[$expectedKey])->toBe($expectedValue); + } + }, + )->with('markdown_code_blocks'); + + it('handles markdown links in strings', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('markdown_links'); + + it('handles leading and trailing characters', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('leading_trailing_characters'); + + it('handles JSON code blocks inside string values', function (string $input, string $expected): void { + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('json_in_strings'); + + it('handles whitespace normalization', function (): void { + $input = '{"key" : "value" , "key2" : "value2"}'; + $result = json_repair($input); + expect(json_validate($result))->toBeTrue(); + $decoded = json_decode($result, true); + expect($decoded)->toBe([ + 'key' => 'value', + 'key2' => 'value2', + ]); + }); + + it('removes comments', function (): void { + $result = json_repair('{"key": "value"} // comment'); + expect(json_validate($result))->toBeTrue(); + $decoded = json_decode($result, true); + expect($decoded)->toBe([ + 'key' => 'value', + ]); + }); }); -it('handles incomplete JSON at end of string', function (string $input, array $expected): void { - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'incomplete string value' => [ - '{"key": "val', [ - 'key' => 'val', - ]], - 'missing value' => [ - '{"key": ', [ - 'key' => '', - ]], - 'incomplete array' => ['["a", "b', ['a', 'b']], -]); - -it('repairs incomplete JSON from streaming LLM responses', function (string $input, array $expected): void { - // Simulates JSON being streamed from an LLM where deltas are concatenated - // The JSON is valid up to a point but may be cut off mid-value, mid-string, etc. - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'cut off mid-string value' => [ - '{"name": "John", "description": "A person who', [ - 'name' => 'John', - 'description' => 'A person who', - ]], - 'cut off mid-number' => [ - '{"count": 123', [ - 'count' => 123, - ]], - 'cut off mid-decimal' => [ - '{"price": 99.9', [ - 'price' => 99.9, - ]], - 'cut off mid-boolean' => [ - '{"active": tru', [ - 'active' => '', - ]], - 'cut off after colon' => [ - '{"name": "John", "age": ', [ - 'name' => 'John', - 'age' => '', - ]], - 'cut off mid-key' => [ - '{"name": "John", "user', [ - 'name' => 'John', - 'user' => '', - ]], - 'cut off mid-object' => [ - '{"user": {"name": "John", "age": 30', [ - 'user' => [ - 'name' => 'John', - 'age' => 30, - ], - ]], - 'cut off mid-nested-object' => [ - '{"data": {"user": {"name": "John", "profile": {"bio": "Developer"', [ - 'data' => [ +describe('Options', function (): void { + describe('omitEmptyValues', function (): void { + it('omits empty values when omitEmptyValues is true', function (string $input, string $expected): void { + $result = json_repair($input, omitEmptyValues: true); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('omit_empty_values_true'); + + it('keeps empty values when omitEmptyValues is false', function (string $input, string $expected): void { + $result = json_repair($input, omitEmptyValues: false); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + })->with('omit_empty_values_false'); + + it('handles nested structures with omitEmptyValues', function (): void { + $input = '{"user": {"name": "John", "age": }, "meta": {"count": }}'; + $expected = '{"user": {"name": "John"}, "meta": {}}'; + $result = json_repair($input, omitEmptyValues: true); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe([ 'user' => [ 'name' => 'John', - 'profile' => [ - 'bio' => 'Developer', - ], - ], - ], - ]], - 'cut off mid-array' => [ - '{"items": [1, 2, 3', [ - 'items' => [1, 2, 3], - ]], - 'cut off mid-array-with-objects' => [ - '{"users": [{"name": "John"}, {"name": "Jane"', [ - 'users' => [ - [ - 'name' => 'John', ], - [ - 'name' => 'Jane', - ], - ], - ]], - 'cut off mid-string-in-array' => [ - '{"tags": ["php", "json", "repair"', [ - 'tags' => ['php', 'json', 'repair'], - ]], - 'cut off after comma' => [ - '{"name": "John", "age": 30, ', [ - 'name' => 'John', - 'age' => 30, - ]], - 'cut off mid-escape-sequence' => [ - '{"message": "Hello\\', [ - 'message' => 'Hello', - ]], - 'cut off mid-unicode-escape' => [ - '{"emoji": "\\u263a', [ - 'emoji' => '\\u263a263a', // Unicode handler reads beyond string end in this edge case - ]], - 'multiple-incomplete-values' => [ - '{"name": "John", "age": 30, "bio": "A developer who loves', [ - 'name' => 'John', - 'age' => 30, - 'bio' => 'A developer who loves', - ]], - 'cut off mid-null' => [ - '{"value": nul', [ - 'value' => '', - ]], - 'cut off mid-false' => [ - '{"enabled": fals', [ - 'enabled' => '', - ]], - 'cut off mid-true' => [ - '{"active": tr', [ - 'active' => '', - ]], - 'cut off with-trailing-comma-before-incomplete' => [ - '{"name": "John", "age": 30, "bio": "A', [ - 'name' => 'John', - 'age' => 30, - 'bio' => 'A', - ]], - 'cut off mid-nested-array' => [ - '{"matrix": [[1, 2], [3, 4', [ - 'matrix' => [ - [1, 2], - [3, 4], - ], - ]], - 'cut off with-mixed-complete-and-incomplete' => [ - '{"complete": "value", "incomplete": "partial', [ - 'complete' => 'value', - 'incomplete' => 'partial', - ]], -]); - -it('handles whitespace normalization', function (): void { - $input = '{"key" : "value" , "key2" : "value2"}'; - $result = json_repair($input); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe([ - 'key' => 'value', - 'key2' => 'value2', - ]); + 'meta' => [], + ]); + }); + + it('handles edge case where removing key leaves empty object', function (): void { + $input = '{"key": }'; + $expected = '{}'; + $result = json_repair($input, omitEmptyValues: true); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe([]); + }); + }); + + describe('omitIncompleteStrings', function (): void { + it( + 'omits incomplete strings when omitIncompleteStrings is true', + function (string $input, string $expected): void { + $result = json_repair($input, omitIncompleteStrings: true); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + }, + )->with('omit_incomplete_strings_true'); + + it( + 'keeps incomplete strings when omitIncompleteStrings is false', + function (string $input, string $expected): void { + $result = json_repair($input, omitIncompleteStrings: false); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + }, + )->with('omit_incomplete_strings_false'); + + it('handles edge case where removing incomplete string leaves empty object', function (): void { + $input = '{"key": "val'; + $expected = '{}'; + $result = json_repair($input, omitIncompleteStrings: true); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe([]); + }); + }); + + describe('combined options', function (): void { + it( + 'handles both omitEmptyValues and omitIncompleteStrings together', + function (string $input, string $expected): void { + $result = json_repair($input, omitEmptyValues: true, omitIncompleteStrings: true); + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe($expected); + + $decoded = json_decode($result, true); + expect($decoded)->toBe(json_decode($expected, true)); + }, + )->with('combined_options'); + }); }); -it('omits empty values when omitEmptyValues is true', function (string $input, array $expected): void { - $result = json_repair($input, omitEmptyValues: true); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'missing value after colon' => [ - '{"key": }', [], - ], - 'missing value with other keys' => [ - '{"key1": "v1", "key2": }', [ - 'key1' => 'v1', - ], - ], - 'missing value at end' => [ - '{"name": "John", "age": ', [ - 'name' => 'John', - ], - ], - 'key without colon' => [ - '{"key"', [], - ], - 'multiple missing values' => [ - '{"key1": "v1", "key2": , "key3": "v3", "key4": }', [ - 'key1' => 'v1', - 'key3' => 'v3', - ], - ], - 'nested object with missing value' => [ - '{"user": {"name": "John", "age": }}', [ - 'user' => [ - 'name' => 'John', - ], - ], - ], - 'all values missing' => [ - '{"key1": , "key2": }', [], - ], -]); - -it('keeps empty values when omitEmptyValues is false', function (string $input, array $expected): void { - $result = json_repair($input, omitEmptyValues: false); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'missing value after colon' => [ - '{"key": }', [ - 'key' => '', - ], - ], - 'missing value with other keys' => [ - '{"key1": "v1", "key2": }', [ - 'key1' => 'v1', - 'key2' => '', - ], - ], -]); - -it('omits incomplete strings when omitIncompleteStrings is true', function (string $input, array $expected): void { - $result = json_repair($input, omitIncompleteStrings: true); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'cut off mid-string value' => [ - '{"name": "John", "description": "A person who', [ - 'name' => 'John', - ], - ], - 'incomplete string at end' => [ - '{"key": "val', [], - ], - 'multiple incomplete strings' => [ - '{"name": "John", "bio": "A developer who', [ - 'name' => 'John', - ], - ], - 'complete and incomplete strings' => [ - '{"complete": "value", "incomplete": "partial', [ - 'complete' => 'value', - ], - ], - 'nested object with incomplete string' => [ - '{"user": {"name": "John", "bio": "A person', [ - 'user' => [ - 'name' => 'John', - ], - ], - ], - 'all strings incomplete' => [ - '{"key1": "val1', [], - ], -]); - -it('keeps incomplete strings when omitIncompleteStrings is false', function (string $input, array $expected): void { - $result = json_repair($input, omitIncompleteStrings: false); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'cut off mid-string value' => [ - '{"name": "John", "description": "A person who', [ - 'name' => 'John', - 'description' => 'A person who', - ], - ], - 'incomplete string at end' => [ - '{"key": "val', [ - 'key' => 'val', - ], - ], -]); - -it('handles both omitEmptyValues and omitIncompleteStrings together', function (string $input, array $expected): void { - $result = json_repair($input, omitEmptyValues: true, omitIncompleteStrings: true); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe($expected); -})->with([ - 'missing value and incomplete string' => [ - '{"name": "John", "age": , "bio": "A developer who', [ - 'name' => 'John', - ], - ], - 'multiple issues' => [ - '{"key1": "v1", "key2": , "key3": "partial', [ - 'key1' => 'v1', - ], - ], - 'all values problematic' => [ - '{"key1": , "key2": "incomplete', [], - ], -]); - -it('works with JsonRepairer class directly with omitEmptyValues', function (): void { - $repairer = new JsonRepairer('{"key": }', omitEmptyValues: true); - $result = $repairer->repair(); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe([]); -}); +describe('API usage', function (): void { + it('works with JsonRepairer class directly', function (): void { + $repairer = new JsonRepairer("{'key': 'value'}"); + $result = $repairer->repair(); + expect(json_validate($result))->toBeTrue(); + expect(json_decode($result, true)['key'])->toBe('value'); + }); -it('works with JsonRepairer class directly with omitIncompleteStrings', function (): void { - $repairer = new JsonRepairer('{"key": "val', omitIncompleteStrings: true); - $result = $repairer->repair(); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe([]); -}); + it('can decode repaired JSON', function (): void { + $repairer = new JsonRepairer("{'key': 'value', 'number': 123}"); + $decoded = $repairer->decode(); -it('works with json_repair_decode with new options', function (): void { - $decoded = json_repair_decode('{"key": }', omitEmptyValues: true); - expect($decoded)->toBeArray(); - expect($decoded)->toBe([]); -}); + expect($decoded)->toBeArray(); + expect($decoded['key'])->toBe('value'); + expect($decoded['number'])->toBe(123); + }); -it('handles nested structures with omitEmptyValues', function (): void { - $input = '{"user": {"name": "John", "age": }, "meta": {"count": }}'; - $result = json_repair($input, omitEmptyValues: true); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe([ - 'user' => [ - 'name' => 'John', - ], - 'meta' => [], - ]); -}); + it('can use json_repair_decode helper function', function (): void { + $decoded = json_repair_decode("{'key': 'value', 'number': 123}"); + + expect($decoded)->toBeArray(); + expect($decoded['key'])->toBe('value'); + expect($decoded['number'])->toBe(123); + }); + + it('works with JsonRepairer class directly with omitEmptyValues', function (): void { + $repairer = new JsonRepairer('{"key": }', omitEmptyValues: true); + $result = $repairer->repair(); + expect(json_validate($result))->toBeTrue(); + $decoded = json_decode($result, true); + expect($decoded)->toBe([]); + }); -it('handles edge case where removing key leaves empty object', function (): void { - $input = '{"key": }'; - $result = json_repair($input, omitEmptyValues: true); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe([]); - expect($result)->toBe('{}'); + it('works with JsonRepairer class directly with omitIncompleteStrings', function (): void { + $repairer = new JsonRepairer('{"key": "val', omitIncompleteStrings: true); + $result = $repairer->repair(); + expect(json_validate($result))->toBeTrue(); + $decoded = json_decode($result, true); + expect($decoded)->toBe([]); + }); + + it('works with json_repair_decode with new options', function (): void { + $decoded = json_repair_decode('{"key": }', omitEmptyValues: true); + expect($decoded)->toBeArray(); + expect($decoded)->toBe([]); + }); }); -it('handles edge case where removing incomplete string leaves empty object', function (): void { - $input = '{"key": "val'; - $result = json_repair($input, omitIncompleteStrings: true); - expect(json_validate($result))->toBeTrue(); - $decoded = json_decode($result, true); - expect($decoded)->toBe([]); - expect($result)->toBe('{}'); +describe('Logging', function (): void { + it('logs nothing for valid JSON', function (): void { + $logger = new TestLogger(); + + $result = json_repair('{"key": "value"}', logger: $logger); + + expect($logger->hasDebug('JSON is already valid, returning as-is'))->toBeTrue(); + expect($logger->records)->toHaveCount(1); + expect($result)->toBe('{"key": "value"}'); + }); + + it('logs repair actions for unclosed strings and brackets', function (): void { + $logger = new TestLogger(); + + $result = json_repair('{"key": "value', logger: $logger); + + expect($logger->hasDebug('Starting JSON repair'))->toBeTrue(); + expect($logger->hasDebug('Adding missing closing quote for unclosed string'))->toBeTrue(); + expect($logger->hasDebug('Adding missing closing bracket/brace'))->toBeTrue(); + + expect(json_validate($result))->toBeTrue(); + expect($result)->toBe('{"key": "value"}'); + }); + + it('logs quote conversions and boolean normalization', function (): void { + $logger = new TestLogger(); + + $result = json_repair("{'active': True}", logger: $logger); + + expect($logger->hasDebug('Converting single-quoted key to double quotes'))->toBeTrue(); + expect($logger->hasDebugThatPasses( + fn(array $record): bool => $record['message'] === 'Normalizing boolean/null value' + && $record['context']['from'] === 'True' + && $record['context']['to'] === 'true', + ))->toBeTrue(); + + expect($result)->toBe('{"active": true}'); + }); + + it('logs unquoted key and value repairs', function (): void { + $logger = new TestLogger(); + + $result = json_repair('{name: John}', logger: $logger); + + expect($logger->hasDebug('Adding quotes around unquoted key'))->toBeTrue(); + expect($logger->hasDebug('Found unquoted string value, adding quotes'))->toBeTrue(); + + expect($result)->toBe('{"name": "John"}'); + }); + + it('logs missing comma and colon insertions', function (): void { + $logger = new TestLogger(); + + $result = json_repair('{"a": 1 "b" 2}', logger: $logger); + + expect($logger->hasDebug('Inserting missing comma'))->toBeTrue(); + expect($logger->hasDebug('Inserting missing colon after key'))->toBeTrue(); + + expect(json_validate($result))->toBeTrue(); + }); + + it('logs context with position information', function (): void { + $logger = new TestLogger(); + + json_repair('{"key": value}', logger: $logger); + + // Verify that log entries include position and context + expect($logger->hasDebugThatPasses( + fn(array $record): bool => isset($record['context']['position']) + && isset($record['context']['context']) + && str_contains((string) $record['context']['context'], '>>>'), + ))->toBeTrue(); + }); + + it('logs markdown extraction', function (): void { + $logger = new TestLogger(); + + $result = json_repair('```json {"key": "value"} ```', logger: $logger); + + expect($logger->hasDebug('Extracted JSON from markdown code block'))->toBeTrue(); + expect($result)->toBe('{"key": "value"}'); + }); + + it('logs omitEmptyValues actions', function (): void { + $logger = new TestLogger(); + + $result = json_repair('{"a": 1, "b": }', omitEmptyValues: true, logger: $logger); + + expect($logger->hasDebug('Removing key with missing value (omitEmptyValues enabled)'))->toBeTrue(); + expect($result)->toBe('{"a": 1}'); + }); + + it('works with JsonRepairer class and setLogger', function (): void { + $logger = new TestLogger(); + + $repairer = new JsonRepairer("{'key': 'value'}"); + $repairer->setLogger($logger); + + $result = $repairer->repair(); + + expect($logger->hasDebugRecords())->toBeTrue(); + expect($result)->toBe('{"key": "value"}'); + }); });