Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions evaluate_gsm8k.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def parse_numeric(s: str):
return None

# Try fraction first
frac_match = re.search(r"(\d+)/(\d+)", s)
frac_match = re.search(r"(-?\d+)/(\d+)", s)
if frac_match:
try:
return float(Fraction(int(frac_match.group(1)), int(frac_match.group(2))))
except Exception:
pass

# Find decimals or integers
nums = re.findall(r"-?\d+\.?\d*", s)
nums = re.findall(r"-?\d*\.?\d+", s)
if not nums:
return None

Expand Down
57 changes: 57 additions & 0 deletions test_evaluate_gsm8k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import sys
from unittest.mock import MagicMock

# Mock out heavy dependencies that might be missing in sandbox
sys.modules['datasets'] = MagicMock()
sys.modules['inference'] = MagicMock()

import pytest
from evaluate_gsm8k import parse_numeric
Comment on lines +2 to +9

def test_parse_numeric_valid_integers():
"""Test parsing of valid positive and negative integers."""
assert parse_numeric("42") == 42.0
assert parse_numeric("-42") == -42.0
assert parse_numeric("0") == 0.0

def test_parse_numeric_valid_decimals():
"""Test parsing of valid positive and negative decimal numbers."""
assert parse_numeric("3.14") == 3.14
assert parse_numeric("-0.5") == -0.5
assert parse_numeric(".5") == 0.5
assert parse_numeric("-.5") == -0.5

def test_parse_numeric_valid_fractions():
"""Test parsing of fractions, which are expected in GSM8K answers."""
assert parse_numeric("1/2") == 0.5
assert parse_numeric("-1/2") == -0.5
assert parse_numeric("3/4") == 0.75

def test_parse_numeric_invalid_fractions():
"""Test fractions with zero in the denominator, which should gracefully fall back."""
# "1/0" has "1" and "0" as number matches. The last number matched is "0", so it returns 0.0.
assert parse_numeric("1/0") == 0.0

def test_parse_numeric_with_text():
"""Test extracting numbers from strings containing text."""
assert parse_numeric("The answer is 42.") == 42.0
assert parse_numeric("My fraction is 3/4 and that's it.") == 0.75

def test_parse_numeric_none_or_empty():
"""Test None, empty, and whitespace strings."""
assert parse_numeric(None) is None
assert parse_numeric("") is None
assert parse_numeric(" ") is None
assert parse_numeric("No numbers here!") is None

def test_parse_numeric_multiple_numbers():
"""Test strings with multiple numbers. Should prefer the last numeric token."""
assert parse_numeric("First 10 then 20.") == 20.0
# re.search finds the first fraction.
assert parse_numeric("I have 1/2 and also 3/4.") == 0.5

def test_parse_numeric_invalid_types():
"""Test behavior with non-string types."""
assert parse_numeric([]) is None
assert parse_numeric({}) is None
assert parse_numeric(123) is None
Loading