From 4d6ffb7c1d672cc3e295e5ca6f06530e09d285d5 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Fri, 1 May 2026 08:42:28 +0100 Subject: [PATCH 1/5] Add ValueError tests for nighit (PR 1377) PR #1377 adds guardrails to nighit(). This PR add tests to those guardrails. Signed-off-by: Arthit Suriyawongkul --- tests/core/test_morpheme.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/core/test_morpheme.py b/tests/core/test_morpheme.py index 9e81b2b28..6a20b728d 100644 --- a/tests/core/test_morpheme.py +++ b/tests/core/test_morpheme.py @@ -10,15 +10,26 @@ class MorphemeTestCase(unittest.TestCase): def test_nighit(self): self.assertEqual(nighit("สํ", "คีต"), "สังคีต") + self.assertEqual( + nighit("สํ", "คีต "), "สังคีต" + ) # w2 has trailing space, should still work + self.assertEqual( + nighit("สํ ", "คีต"), "สังคีต" + ) # w1 has trailing space, should still work self.assertEqual(nighit("สํ", "จร"), "สัญจร") self.assertEqual(nighit("สํ", "ฐาน"), "สัณฐาน") self.assertEqual(nighit("สํ", "นิษฐาน"), "สันนิษฐาน") self.assertEqual(nighit("สํ", "ปทา"), "สัมปทา") self.assertEqual(nighit("สํ", "โยค"), "สังโยค") + self.assertEqual(nighit("", "คีต"), "คีต") # w1 is empty, should return w2 + self.assertEqual(nighit("สํ", ""), "สํ") # w2 is empty, should return w1 + with self.assertRaises(NotImplementedError): nighit("abc", "คีต") # w1 does not end with ํ and len > 2 with self.assertRaises(NotImplementedError): nighit("สํ", "มาร") # consonant ม is not in any supported group + with self.assertRaises(ValueError): + nighit("สํ", "123") # w2 does not contain any Thai consonant def test_is_native_thai(self): self.assertFalse(is_native_thai(None)) # type: ignore[arg-type] From 3813cfb07a1d6feff2381321f13edc8334884d85 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Fri, 1 May 2026 08:48:48 +0100 Subject: [PATCH 2/5] Add check_sara tests Signed-off-by: Arthit Suriyawongkul --- tests/core/test_khavee.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/core/test_khavee.py b/tests/core/test_khavee.py index 0a89559e6..a3082058e 100644 --- a/tests/core/test_khavee.py +++ b/tests/core/test_khavee.py @@ -258,3 +258,9 @@ def test_เอือ_sara(self): def test_returns_string(self): self.assertIsInstance(self.kv.check_sara("เริง"), str) + + def test_empty_string_returns_empty(self): + self.assertEqual(self.kv.check_sara(""), "") + + def test_empty_string_after_removing_karun_returns_empty(self): + self.assertEqual(self.kv.check_sara("ก์"), "") From 4bd58e8a66f0a0fc8cb7ed4ec50c434515144a5a Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Fri, 1 May 2026 08:53:39 +0100 Subject: [PATCH 3/5] Add test for check_sara Signed-off-by: Arthit Suriyawongkul --- tests/core/test_khavee.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/core/test_khavee.py b/tests/core/test_khavee.py index a3082058e..b49ea6c2c 100644 --- a/tests/core/test_khavee.py +++ b/tests/core/test_khavee.py @@ -264,3 +264,8 @@ def test_empty_string_returns_empty(self): def test_empty_string_after_removing_karun_returns_empty(self): self.assertEqual(self.kv.check_sara("ก์"), "") + + def test_empty_string_after_removing_tone_marks_returns_empty(self): + self.assertEqual( + self.kv.check_sara("\u0e48"), "" + ) # The string contains only Thai Mai Ek tone mark From 427595d5e3d60fafff3bc3ef20082cbe9f64ee73 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Fri, 1 May 2026 09:15:56 +0100 Subject: [PATCH 4/5] Update CHANGELOG.md Signed-off-by: Arthit Suriyawongkul --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 330d57cde..978055405 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,12 @@ and this project adheres to - Full release notes: - Commit history: +## [Unreleased] + +## Changed + +- Improve guardrails in `check_sara()` and `nighit()` + ## [5.3.4] - 2026-04-02 ### Fixed From 6ca49ce8fca039025d390697c63c395139b66d30 Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Fri, 1 May 2026 10:04:25 +0100 Subject: [PATCH 5/5] Update test Signed-off-by: Arthit Suriyawongkul --- pythainlp/khavee/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythainlp/khavee/core.py b/pythainlp/khavee/core.py index 71a0dde2d..79c9673b6 100644 --- a/pythainlp/khavee/core.py +++ b/pythainlp/khavee/core.py @@ -217,7 +217,7 @@ def check_sara(self, word: str) -> str: sara.append("เอือ") if not sara: - return "Can't find Sara in this word" + return "" return sara[0]