From 8de04bcf61de43a13f100986b6f1ff055ef1eeb8 Mon Sep 17 00:00:00 2001 From: rexmhall09 <70306565+rexmhall09@users.noreply.github.com> Date: Tue, 10 Mar 2026 12:32:00 -0700 Subject: [PATCH 1/3] Use safe ratio helper in regularity Add _safe_round_ratio to avoid division-by-zero when computing proportion fields in regularity(). Replace direct round(divisions) with the helper for pattern, full_proportion and word ratios so they return 0.0 if the denominator is zero. Extend tests to cover cases that produce zeroed outputs (different min_refs and sound_classes), ensuring the function returns stable 0.0 proportions instead of raising errors or producing NaN. --- src/lingrex/regularity.py | 10 +++++++--- tests/test_regularity.py | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/lingrex/regularity.py b/src/lingrex/regularity.py index 7bc01bd..0154bba 100644 --- a/src/lingrex/regularity.py +++ b/src/lingrex/regularity.py @@ -6,6 +6,10 @@ from lingpy import log +def _safe_round_ratio(part, whole): + return round((part / whole), 2) if whole else 0.0 + + def regularity(wordlist, threshold=3, ref="cogid", min_refs=3, word_threshold=0.75, sound_classes="cv"): """ @@ -96,13 +100,13 @@ def regularity(wordlist, threshold=3, ref="cogid", min_refs=3, regular_patterns, patterns - regular_patterns, patterns, - round((regular_patterns / patterns), 2), + _safe_round_ratio(regular_patterns, patterns), regular_proportion, full_proportion - regular_proportion, full_proportion, - round((regular_proportion / full_proportion), 2), + _safe_round_ratio(regular_proportion, full_proportion), regular_words, irregular_words, regular_words + irregular_words, - round((regular_words / (regular_words + irregular_words)), 2), + _safe_round_ratio(regular_words, regular_words + irregular_words), ) diff --git a/tests/test_regularity.py b/tests/test_regularity.py index b27375e..9776871 100644 --- a/tests/test_regularity.py +++ b/tests/test_regularity.py @@ -33,3 +33,9 @@ def test_regularity(): sound_classes="cv") assert output == (2, 5, 7, 0.29, 4, 5, 9, 0.44, 3, 4, 7, 0.43) + assert regularity( + test_alg, threshold=2, word_threshold=0.5, sound_classes="cv", min_refs=5 + ) == (2, 5, 7, 0.29, 4, 5, 9, 0.44, 0, 0, 0, 0.0) + assert regularity( + test_alg, threshold=2, word_threshold=0.5, sound_classes="T" + ) == (0, 0, 0, 0.0, 0, 0, 0, 0.0, 0, 0, 0, 0.0) From 40f1cef7eef788c54a6bcb86312b3212226de891 Mon Sep 17 00:00:00 2001 From: rexmhall09 <70306565+rexmhall09@users.noreply.github.com> Date: Wed, 11 Mar 2026 12:37:01 -0700 Subject: [PATCH 2/3] Warn when regularity finds no patterns or refs Add runtime warnings in regularity() for edge cases where no patterns or no cognate sets meet min_refs are found, clarifying that regularity proportions are set to 0.0. Import warnings in the module and update tests to assert the appropriate RuntimeWarning messages (use pytest.warns and import warns). --- src/lingrex/regularity.py | 23 +++++++++++++++++++++++ tests/test_regularity.py | 16 +++++++++------- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/lingrex/regularity.py b/src/lingrex/regularity.py index 0154bba..ae37c54 100644 --- a/src/lingrex/regularity.py +++ b/src/lingrex/regularity.py @@ -2,6 +2,7 @@ Calculate regularity metrics on dataset. """ import statistics +import warnings from lingpy import log @@ -96,6 +97,28 @@ def regularity(wordlist, threshold=3, ref="cogid", min_refs=3, else: irregular_words += len(set(msa["taxa"])) + if not patterns and not (regular_words + irregular_words): + warnings.warn( + "No patterns found for sound_classes={0!r}; regularity proportions " + "are set to 0.0.".format(sound_classes), + RuntimeWarning, + stacklevel=2, + ) + elif not patterns: + warnings.warn( + "No patterns found for sound_classes={0!r}; pattern regularity " + "proportions are set to 0.0.".format(sound_classes), + RuntimeWarning, + stacklevel=2, + ) + elif not (regular_words + irregular_words): + warnings.warn( + "No cognate sets meet min_refs={0}; word regularity is set " + "to 0.0.".format(min_refs), + RuntimeWarning, + stacklevel=2, + ) + return ( regular_patterns, patterns - regular_patterns, diff --git a/tests/test_regularity.py b/tests/test_regularity.py index 9776871..45cc3a4 100644 --- a/tests/test_regularity.py +++ b/tests/test_regularity.py @@ -1,4 +1,4 @@ -from pytest import raises +from pytest import raises, warns from lingpy import Wordlist, Alignments from lingrex.copar import CoPaR from lingrex.util import add_structure @@ -33,9 +33,11 @@ def test_regularity(): sound_classes="cv") assert output == (2, 5, 7, 0.29, 4, 5, 9, 0.44, 3, 4, 7, 0.43) - assert regularity( - test_alg, threshold=2, word_threshold=0.5, sound_classes="cv", min_refs=5 - ) == (2, 5, 7, 0.29, 4, 5, 9, 0.44, 0, 0, 0, 0.0) - assert regularity( - test_alg, threshold=2, word_threshold=0.5, sound_classes="T" - ) == (0, 0, 0, 0.0, 0, 0, 0, 0.0, 0, 0, 0, 0.0) + with warns(RuntimeWarning, match=r"No cognate sets meet min_refs=5"): + assert regularity( + test_alg, threshold=2, word_threshold=0.5, sound_classes="cv", min_refs=5 + ) == (2, 5, 7, 0.29, 4, 5, 9, 0.44, 0, 0, 0, 0.0) + with warns(RuntimeWarning, match=r"No patterns found for sound_classes='T'"): + assert regularity( + test_alg, threshold=2, word_threshold=0.5, sound_classes="T" + ) == (0, 0, 0, 0.0, 0, 0, 0, 0.0, 0, 0, 0, 0.0) From 9de59361fe7f900f771e277894017e6f4d90af87 Mon Sep 17 00:00:00 2001 From: rexmhall09 <70306565+rexmhall09@users.noreply.github.com> Date: Wed, 11 Mar 2026 14:15:31 -0700 Subject: [PATCH 3/3] Convert regularity warnings to ValueError Stop issuing runtime warnings in regularity and instead raise ValueError for three no-data edge cases: when no patterns are detected, when no eligible alignment sites are found (full_proportion == 0), and when no words meet the min_refs threshold. Remove the _safe_round_ratio helper and the warnings import, and replace its uses with inline round calls. Update tests to expect exceptions (raises) and remove warns usage accordingly. --- src/lingrex/regularity.py | 38 +++++++++++++------------------------- tests/test_regularity.py | 20 +++++++++++++------- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/lingrex/regularity.py b/src/lingrex/regularity.py index ae37c54..8b487a2 100644 --- a/src/lingrex/regularity.py +++ b/src/lingrex/regularity.py @@ -2,14 +2,10 @@ Calculate regularity metrics on dataset. """ import statistics -import warnings from lingpy import log -def _safe_round_ratio(part, whole): - return round((part / whole), 2) if whole else 0.0 - def regularity(wordlist, threshold=3, ref="cogid", min_refs=3, word_threshold=0.75, sound_classes="cv"): @@ -97,39 +93,31 @@ def regularity(wordlist, threshold=3, ref="cogid", min_refs=3, else: irregular_words += len(set(msa["taxa"])) - if not patterns and not (regular_words + irregular_words): - warnings.warn( - "No patterns found for sound_classes={0!r}; regularity proportions " - "are set to 0.0.".format(sound_classes), - RuntimeWarning, - stacklevel=2, + if patterns == 0: + raise ValueError( + "Cannot compute regularity: no patterns were detected in the data. " + "Check sound_classes or input data." ) - elif not patterns: - warnings.warn( - "No patterns found for sound_classes={0!r}; pattern regularity " - "proportions are set to 0.0.".format(sound_classes), - RuntimeWarning, - stacklevel=2, + if full_proportion == 0: + raise ValueError( + "Cannot compute regularity: no eligible alignment sites were found." ) - elif not (regular_words + irregular_words): - warnings.warn( - "No cognate sets meet min_refs={0}; word regularity is set " - "to 0.0.".format(min_refs), - RuntimeWarning, - stacklevel=2, + if (regular_words + irregular_words) == 0: + raise ValueError( + "Cannot compute regularity: no words satisfy the min_refs threshold." ) return ( regular_patterns, patterns - regular_patterns, patterns, - _safe_round_ratio(regular_patterns, patterns), + round((regular_patterns / patterns), 2), regular_proportion, full_proportion - regular_proportion, full_proportion, - _safe_round_ratio(regular_proportion, full_proportion), + round((regular_proportion / full_proportion), 2), regular_words, irregular_words, regular_words + irregular_words, - _safe_round_ratio(regular_words, regular_words + irregular_words), + round((regular_words / (regular_words + irregular_words)), 2), ) diff --git a/tests/test_regularity.py b/tests/test_regularity.py index 45cc3a4..786be50 100644 --- a/tests/test_regularity.py +++ b/tests/test_regularity.py @@ -1,4 +1,4 @@ -from pytest import raises, warns +from pytest import raises from lingpy import Wordlist, Alignments from lingrex.copar import CoPaR from lingrex.util import add_structure @@ -33,11 +33,17 @@ def test_regularity(): sound_classes="cv") assert output == (2, 5, 7, 0.29, 4, 5, 9, 0.44, 3, 4, 7, 0.43) - with warns(RuntimeWarning, match=r"No cognate sets meet min_refs=5"): - assert regularity( + with raises( + ValueError, + match=r"Cannot compute regularity: no words satisfy the min_refs threshold\.", + ): + regularity( test_alg, threshold=2, word_threshold=0.5, sound_classes="cv", min_refs=5 - ) == (2, 5, 7, 0.29, 4, 5, 9, 0.44, 0, 0, 0, 0.0) - with warns(RuntimeWarning, match=r"No patterns found for sound_classes='T'"): - assert regularity( + ) + with raises( + ValueError, + match=r"Cannot compute regularity: no patterns were detected in the data\.", + ): + regularity( test_alg, threshold=2, word_threshold=0.5, sound_classes="T" - ) == (0, 0, 0, 0.0, 0, 0, 0, 0.0, 0, 0, 0, 0.0) + )