diff --git a/external.go b/external.go new file mode 100644 index 0000000..6be874c --- /dev/null +++ b/external.go @@ -0,0 +1,42 @@ +package displaywidth + +import "sync" + +var ( + externalMu sync.RWMutex + externalWidths map[string]int +) + +// SetExternalWidths installs a map of grapheme-cluster-string → cell-width +// that overrides the library's built-in width tables. This is for callers +// that have probed the terminal directly (e.g. via CSI 6n cursor position +// reports) and want String/Bytes/Rune — and consequently downstream +// libraries like charmbracelet/x/ansi and charm.land/lipgloss — to use +// the probed widths instead of the spec-derived ones. +// +// Lookup precedence per grapheme cluster: +// 1. The external map, if non-nil and the cluster is present. +// 2. The library's trie + VS16/skin-tone heuristics. +// +// The map is treated as immutable post-install. Callers must replace +// the map by calling SetExternalWidths again with a new instance rather +// than mutating the existing one. Pass nil to clear. +func SetExternalWidths(m map[string]int) { + externalMu.Lock() + defer externalMu.Unlock() + externalWidths = m +} + +// externalLookup returns (width, true) if s is in the external widths +// map, or (0, false) otherwise. Uses a snapshot pattern so the lock is +// held only briefly. +func externalLookup[T ~string | []byte](s T) (int, bool) { + externalMu.RLock() + m := externalWidths + externalMu.RUnlock() + if m == nil { + return 0, false + } + w, ok := m[string(s)] + return w, ok +} diff --git a/external_test.go b/external_test.go new file mode 100644 index 0000000..e49f835 --- /dev/null +++ b/external_test.go @@ -0,0 +1,83 @@ +package displaywidth + +import "testing" + +func TestSetExternalWidths(t *testing.T) { + // Clear at end so other tests aren't affected. + defer SetExternalWidths(nil) + + // Baseline: no external map. + SetExternalWidths(nil) + baseHeart := String("❤️") + baseThumbsup := String("👍") + if baseThumbsup == 0 { + t.Fatalf("baseline thumbsup should be > 0") + } + + // Install external override that contradicts the spec values. + SetExternalWidths(map[string]int{ + "❤️": 1, // spec/PR says 2 (VS16 promotion); override to 1 + "👍": 5, // spec says 2; override to 5 to verify override is used + }) + + if got := String("❤️"); got != 1 { + t.Errorf("with external override, String(❤️) = %d, want 1", got) + } + if got := String("👍"); got != 5 { + t.Errorf("with external override, String(👍) = %d, want 5", got) + } + + // Strings not in the override fall back to spec. + if got := String("🔥"); got != 2 { + t.Errorf("fallback String(🔥) = %d, want 2 (no override)", got) + } + + // Clearing returns to baseline. + SetExternalWidths(nil) + if got := String("❤️"); got != baseHeart { + t.Errorf("after clear, String(❤️) = %d, want baseline %d", got, baseHeart) + } + if got := String("👍"); got != baseThumbsup { + t.Errorf("after clear, String(👍) = %d, want baseline %d", got, baseThumbsup) + } +} + +func TestExternalWidthsAffectsBytes(t *testing.T) { + defer SetExternalWidths(nil) + + SetExternalWidths(map[string]int{"👍": 4}) + + if got := Bytes([]byte("👍")); got != 4 { + t.Errorf("Bytes(👍) with override = %d, want 4", got) + } +} + +func TestExternalWidthsDoesNotAffectASCII(t *testing.T) { + defer SetExternalWidths(nil) + + // Even if the override map says "a" is 99, ASCII fast paths should + // return the standard width — the override is checked only inside + // graphemeWidth, which doesn't run for printable ASCII. + SetExternalWidths(map[string]int{"a": 99}) + + if got := String("a"); got != 1 { + t.Errorf("String(\"a\") = %d, want 1 (ASCII bypass)", got) + } + if got := String("hello"); got != 5 { + t.Errorf("String(\"hello\") = %d, want 5 (ASCII bypass)", got) + } +} + +func TestExternalWidthsMixedContent(t *testing.T) { + defer SetExternalWidths(nil) + + SetExternalWidths(map[string]int{ + "❤️": 1, + "👍": 4, + }) + + // "abc❤️def👍" → 3 + 1 + 3 + 4 = 11 + if got := String("abc❤️def👍"); got != 11 { + t.Errorf("mixed content = %d, want 11", got) + } +} diff --git a/width.go b/width.go index f6e0ab7..107b61c 100644 --- a/width.go +++ b/width.go @@ -168,18 +168,43 @@ func graphemeWidth[T ~string | []byte](s T, options Options) int { return 0 } + // External (probed) widths override the spec tables. Skipped for + // ASCII because external maps are intended for emoji/CJK clusters, + // not single bytes. + if w, ok := externalLookup(s); ok { + return w + } + p, sz := lookup(s) prop := property(p) - // Variation Selector 16 (VS16) requests emoji presentation - if prop != _Wide && sz > 0 && len(s) >= sz+3 { - vs := s[sz : sz+3] - if isVS16(vs) { - prop = _Wide + // Check remaining bytes in the grapheme cluster for modifiers that + // indicate emoji presentation (width 2). + // + // VS16 (U+FE0F) requests emoji presentation per Unicode TR51. + // Emoji modifiers (U+1F3FB–U+1F3FF, skin tones) form an + // emoji_modifier_sequence per UTS#51 ED-13, which is always + // rendered in emoji presentation. + // + // We scan the full cluster because these modifiers may not be + // immediately adjacent to the base character (e.g., in ZWJ + // sequences like ⛹🏻‍♂️ where VS16 is at the end). + if prop != _Wide && sz > 0 && len(s) > sz { + for i := sz; i < len(s); i++ { + // VS16: U+FE0F, UTF-8 is EF B8 8F + if i+2 < len(s) && s[i] == 0xEF && s[i+1] == 0xB8 && s[i+2] == 0x8F { + prop = _Wide + break + } + // Emoji modifier (skin tone): U+1F3FB–U+1F3FF + // UTF-8 is F0 9F 8F BB through F0 9F 8F BF + if i+3 < len(s) && s[i] == 0xF0 && s[i+1] == 0x9F && s[i+2] == 0x8F && s[i+3] >= 0xBB && s[i+3] <= 0xBF { + prop = _Wide + break + } } - // VS15 (0x8E) requests text presentation but does not affect width, - // in my reading of Unicode TR51. Falls through to return the base - // character's property. + // VS15 (U+FE0E) requests text presentation but does not affect + // width, in my reading of Unicode TR51. } if options.EastAsianWidth && prop == _East_Asian_Ambiguous { diff --git a/width_test.go b/width_test.go index bcf5a9d..b4dd49d 100644 --- a/width_test.go +++ b/width_test.go @@ -60,6 +60,26 @@ func TestStringWidth(t *testing.T) { {"keycap 1️⃣", "1️⃣", defaultOptions, 2}, // Keycap sequence: 1 + VS16 + U+20E3 (always width 2) {"keycap #️⃣", "#️⃣", defaultOptions, 2}, // Keycap sequence: # + VS16 + U+20E3 (always width 2) + // Bug fix: VS16 not adjacent to first rune in ZWJ + skin-tone + gender sequences. + // VS16 appears at the end, many bytes after the first rune. + {"⛹🏻‍♂️ bouncing ball m tone1", "\u26F9\U0001F3FB\u200D\u2642\uFE0F", defaultOptions, 2}, + {"🕵🏻‍♂️ detective m tone1", "\U0001F575\U0001F3FB\u200D\u2642\uFE0F", defaultOptions, 2}, + {"🏌🏻‍♀️ golfing f tone1", "\U0001F3CC\U0001F3FB\u200D\u2640\uFE0F", defaultOptions, 2}, + {"🏋🏿‍♂️ weights m tone5", "\U0001F3CB\U0001F3FF\u200D\u2642\uFE0F", defaultOptions, 2}, + {"⛹🏾‍♀️ bouncing ball f tone4", "\u26F9\U0001F3FE\u200D\u2640\uFE0F", defaultOptions, 2}, + {"🕵🏿‍♀️ detective f tone5", "\U0001F575\U0001F3FF\u200D\u2640\uFE0F", defaultOptions, 2}, + + // Bug fix: skin-tone modifier on text-default Extended_Pictographic base. + // These form emoji_modifier_sequences (UTS#51 ED-13) and should be width 2. + {"🕵🏻 detective skin", "\U0001F575\U0001F3FB", defaultOptions, 2}, + {"☝🏽 point up skin", "\u261D\U0001F3FD", defaultOptions, 2}, + {"✌🏾 victory skin", "\u270C\U0001F3FE", defaultOptions, 2}, + {"✍🏿 writing hand skin", "\u270D\U0001F3FF", defaultOptions, 2}, + {"🖐🏻 hand splayed skin", "\U0001F590\U0001F3FB", defaultOptions, 2}, + {"⛹🏼 bouncing ball skin", "\u26F9\U0001F3FC", defaultOptions, 2}, + {"🏌🏽 golfing skin", "\U0001F3CC\U0001F3FD", defaultOptions, 2}, + {"🏋🏾 weights skin", "\U0001F3CB\U0001F3FE", defaultOptions, 2}, + // Flags (regional indicator pairs form a single grapheme, always width 2 per TR51) {"flag US", "🇺🇸", defaultOptions, 2}, {"flag JP", "🇯🇵", defaultOptions, 2},