diff --git a/.changeset/heavy-clouds-walk.md b/.changeset/heavy-clouds-walk.md new file mode 100644 index 0000000..28a3b8d --- /dev/null +++ b/.changeset/heavy-clouds-walk.md @@ -0,0 +1,16 @@ +--- +"unicode-segmenter": patch +--- + +Fix GB9c rule; reset internal "InCB=Consonant" state properly. + +So giving the following input: + +``` +# Malayalam KA + Virama + SPACE + VA +"क्‌ क" +``` + +Will now produces three sperated segments correctly. + +Thanks to @spaceemotion for reporting this issue. diff --git a/src/grapheme.js b/src/grapheme.js index f5e8a6a..d0a6ef4 100644 --- a/src/grapheme.js +++ b/src/grapheme.js @@ -150,6 +150,7 @@ export function* graphemeSegments(input) { // Reset segment state emoji = false; + consonant = false; riCount = 0; index = cursor; _catBegin = catAfter; diff --git a/test/grapheme.js b/test/grapheme.js index d4efaed..f91d4f2 100644 --- a/test/grapheme.js +++ b/test/grapheme.js @@ -307,6 +307,7 @@ test('counterexamples', async t => { ' जा', ' କା', ' ଶ୍ୟା', + 'ക് വ', ]; for (let counter of counterExamples) {