diff --git a/pythainlp/khavee/core.py b/pythainlp/khavee/core.py index f940e8922..7e2f78f80 100644 --- a/pythainlp/khavee/core.py +++ b/pythainlp/khavee/core.py @@ -54,9 +54,15 @@ def check_sara(self, word: str) -> str: sara = [] countoa = 0 + if not word: + return "" + # In case of การันย์ if "์" in word[-1]: word = word[:-2] + # After removing the karun, the word may become empty (e.g. "ก์") + if not word: + return "" # In case of สระเดี่ยว for i in word: @@ -253,6 +259,9 @@ def check_marttra(self, word: str) -> str: word = self.handle_karun_sound_silence(word) word = remove_tonemark(word) + if not word: + return "" + # Check for ำ at the end (represents "am" sound, ends with m) if word[-1] == "ำ": return "กม" diff --git a/pythainlp/morpheme/word_formation.py b/pythainlp/morpheme/word_formation.py index 63453a25e..45817dc31 100644 --- a/pythainlp/morpheme/word_formation.py +++ b/pythainlp/morpheme/word_formation.py @@ -31,6 +31,14 @@ def nighit(w1: str, w2: str) -> str: assert nighit("สํ","ปทา")=="สัมปทา" assert nighit("สํ","โยค")=="สังโยค" """ + if not isinstance(w1, str) or not isinstance(w2, str): + raise TypeError("Both w1 and w2 must be strings.") + w1 = w1.strip() + w2 = w2.strip() + if not w1: + return w2 + if not w2: + return w1 if not str(w1).endswith("ํ") and len(w1) != 2: raise NotImplementedError(f"The function doesn't support {w1}.") list_w1 = list(w1) @@ -38,7 +46,13 @@ def nighit(w1: str, w2: str) -> str: newword = [] newword.append(list_w1[0]) newword.append("ั") - consonant_start = [i for i in list_w2 if i in set(thai_consonants)][0] + _consonants = set(thai_consonants) + consonants_in_w2 = [i for i in list_w2 if i in _consonants] + if not consonants_in_w2: + raise ValueError( + f"w2 {w2!r} contains no Thai consonants." + ) + consonant_start = consonants_in_w2[0] if consonant_start in ["ก", "ช", "ค", "ข", "ง"]: newword.append("ง") elif consonant_start in ["จ", "ฉ", "ช", "ฌ"]: