Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pythainlp/khavee/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,15 @@ def check_sara(self, word: str) -> str:
sara = []
countoa = 0

if not word:
return ""
Comment thread
bact marked this conversation as resolved.
Comment thread
bact marked this conversation as resolved.

# In case of การันย์
if "์" in word[-1]:
word = word[:-2]
Comment thread
bact marked this conversation as resolved.
# After removing the karun, the word may become empty (e.g. "ก์")
if not word:
return ""

# In case of สระเดี่ยว
for i in word:
Expand Down Expand Up @@ -253,6 +259,9 @@ def check_marttra(self, word: str) -> str:
word = self.handle_karun_sound_silence(word)
word = remove_tonemark(word)

if not word:
return ""
Comment thread
bact marked this conversation as resolved.

# Check for ำ at the end (represents "am" sound, ends with m)
if word[-1] == "ำ":
return "กม"
Expand Down
16 changes: 15 additions & 1 deletion pythainlp/morpheme/word_formation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,28 @@ def nighit(w1: str, w2: str) -> str:
assert nighit("สํ","ปทา")=="สัมปทา"
assert nighit("สํ","โยค")=="สังโยค"
"""
if not isinstance(w1, str) or not isinstance(w2, str):
raise TypeError("Both w1 and w2 must be strings.")
w1 = w1.strip()
w2 = w2.strip()
if not w1:
return w2
if not w2:
return w1
if not str(w1).endswith("ํ") and len(w1) != 2:
raise NotImplementedError(f"The function doesn't support {w1}.")
list_w1 = list(w1)
list_w2 = list(w2)
newword = []
newword.append(list_w1[0])
newword.append("ั")
consonant_start = [i for i in list_w2 if i in set(thai_consonants)][0]
_consonants = set(thai_consonants)
consonants_in_w2 = [i for i in list_w2 if i in _consonants]
if not consonants_in_w2:
raise ValueError(
Comment thread
bact marked this conversation as resolved.
f"w2 {w2!r} contains no Thai consonants."
)
consonant_start = consonants_in_w2[0]
if consonant_start in ["ก", "ช", "ค", "ข", "ง"]:
newword.append("ง")
elif consonant_start in ["จ", "ฉ", "ช", "ฌ"]:
Expand Down
Loading