From a35f46630589c906730d38146d72def501042067 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 18 Mar 2026 00:10:37 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=AA=20Add=20test=20coverage=20and=20fi?= =?UTF-8?q?x=20parsing=20bugs=20in=20`parse=5Fnumeric`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: dhanush342 <187305764+dhanush342@users.noreply.github.com> --- evaluate_gsm8k.py | 4 +-- test_evaluate_gsm8k.py | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 test_evaluate_gsm8k.py diff --git a/evaluate_gsm8k.py b/evaluate_gsm8k.py index 3826a32..3a90718 100644 --- a/evaluate_gsm8k.py +++ b/evaluate_gsm8k.py @@ -18,7 +18,7 @@ def parse_numeric(s: str): return None # Try fraction first - frac_match = re.search(r"(\d+)/(\d+)", s) + frac_match = re.search(r"(-?\d+)/(\d+)", s) if frac_match: try: return float(Fraction(int(frac_match.group(1)), int(frac_match.group(2)))) @@ -26,7 +26,7 @@ def parse_numeric(s: str): pass # Find decimals or integers - nums = re.findall(r"-?\d+\.?\d*", s) + nums = re.findall(r"-?\d*\.?\d+", s) if not nums: return None diff --git a/test_evaluate_gsm8k.py b/test_evaluate_gsm8k.py new file mode 100644 index 0000000..345f25f --- /dev/null +++ b/test_evaluate_gsm8k.py @@ -0,0 +1,57 @@ +import sys +from unittest.mock import MagicMock + +# Mock out heavy dependencies that might be missing in sandbox +sys.modules['datasets'] = MagicMock() +sys.modules['inference'] = MagicMock() + +import pytest +from evaluate_gsm8k import parse_numeric + +def test_parse_numeric_valid_integers(): + """Test parsing of valid positive and negative integers.""" + assert parse_numeric("42") == 42.0 + assert parse_numeric("-42") == -42.0 + assert parse_numeric("0") == 0.0 + +def test_parse_numeric_valid_decimals(): + """Test parsing of valid positive and negative decimal numbers.""" + assert parse_numeric("3.14") == 3.14 + assert parse_numeric("-0.5") == -0.5 + assert parse_numeric(".5") == 0.5 + assert parse_numeric("-.5") == -0.5 + +def test_parse_numeric_valid_fractions(): + """Test parsing of fractions, which are expected in GSM8K answers.""" + assert parse_numeric("1/2") == 0.5 + assert parse_numeric("-1/2") == -0.5 + assert parse_numeric("3/4") == 0.75 + +def test_parse_numeric_invalid_fractions(): + """Test fractions with zero in the denominator, which should gracefully fall back.""" + # "1/0" has "1" and "0" as number matches. The last number matched is "0", so it returns 0.0. + assert parse_numeric("1/0") == 0.0 + +def test_parse_numeric_with_text(): + """Test extracting numbers from strings containing text.""" + assert parse_numeric("The answer is 42.") == 42.0 + assert parse_numeric("My fraction is 3/4 and that's it.") == 0.75 + +def test_parse_numeric_none_or_empty(): + """Test None, empty, and whitespace strings.""" + assert parse_numeric(None) is None + assert parse_numeric("") is None + assert parse_numeric(" ") is None + assert parse_numeric("No numbers here!") is None + +def test_parse_numeric_multiple_numbers(): + """Test strings with multiple numbers. Should prefer the last numeric token.""" + assert parse_numeric("First 10 then 20.") == 20.0 + # re.search finds the first fraction. + assert parse_numeric("I have 1/2 and also 3/4.") == 0.5 + +def test_parse_numeric_invalid_types(): + """Test behavior with non-string types.""" + assert parse_numeric([]) is None + assert parse_numeric({}) is None + assert parse_numeric(123) is None