From 943e0aa12f0251055eecd21a34987ee1321db4f4 Mon Sep 17 00:00:00 2001 From: fluffur Date: Sun, 21 Jun 2026 11:36:00 +0500 Subject: [PATCH 1/3] feat: add original message formatting helpers with tests --- formatting.go | 395 +++++++++++++++++++++++++++++++++++++++++++++ formatting_test.go | 231 ++++++++++++++++++++++++++ 2 files changed, 626 insertions(+) create mode 100644 formatting.go create mode 100644 formatting_test.go diff --git a/formatting.go b/formatting.go new file mode 100644 index 0000000..1f9221a --- /dev/null +++ b/formatting.go @@ -0,0 +1,395 @@ +package botapi + +import ( + "html" + "slices" + "strconv" + "strings" + "unicode" + "unicode/utf16" +) + +var mdMap = map[MessageEntityType]string{ + EntityBold: "*", + EntityItalic: "_", + EntityCode: "`", + EntityPre: "```", +} + +var mdV2Map = map[MessageEntityType]string{ + EntityBold: "*", + EntityItalic: "_", + EntityCode: "`", + EntityPre: "```", + EntityUnderline: "__", + EntityStrikethrough: "~", + EntitySpoiler: "||", + EntityBlockquote: ">", + EntityExpandableBlockquote: "**>", +} + +var htmlMap = map[MessageEntityType]string{ + EntityBold: "b", + EntityItalic: "i", + EntityCode: "code", + EntityPre: "pre", + EntityUnderline: "u", + EntityStrikethrough: "s", + EntitySpoiler: "span class=\"tg-spoiler\"", + EntityBlockquote: "blockquote", + EntityExpandableBlockquote: "blockquote expandable", +} + +// TextAndEntities gets message or caption text and entities +func (m *Message) TextAndEntities() (string, []MessageEntity) { + if m.Text != "" { + return m.Text, m.Entities + } + return m.Caption, m.CaptionEntities +} + +// OriginalMD gets the original markdown formatting of a message text. +func (m *Message) OriginalMD() string { + return getOrigMsgMD(utf16.Encode([]rune(m.Text)), m.Entities) +} + +// OriginalMDV2 gets the original markdownV2 formatting of a message text. +func (m *Message) OriginalMDV2() string { + return getOrigMsgMDV2(utf16.Encode([]rune(m.Text)), m.Entities) +} + +// OriginalHTML gets the original HTML formatting of a message text. +func (m *Message) OriginalHTML() string { + return getOrigMsgHTML(utf16.Encode([]rune(m.Text)), m.Entities) +} + +// OriginalCaptionMD gets the original markdown formatting of a message caption. +func (m *Message) OriginalCaptionMD() string { + return getOrigMsgMD(utf16.Encode([]rune(m.Caption)), m.CaptionEntities) +} + +// OriginalCaptionMDV2 gets the original markdownV2 formatting of a message caption. +func (m *Message) OriginalCaptionMDV2() string { + return getOrigMsgMDV2(utf16.Encode([]rune(m.Caption)), m.CaptionEntities) +} + +// OriginalCaptionHTML gets the original HTML formatting of a message caption. +func (m *Message) OriginalCaptionHTML() string { + return getOrigMsgHTML(utf16.Encode([]rune(m.Caption)), m.CaptionEntities) +} + +// OriginalTextMD gets the original markdown formatting of a message text or caption. +func (m *Message) OriginalTextMD() string { + text, ents := m.TextAndEntities() + return getOrigMsgMD(utf16.Encode([]rune(text)), ents) +} + +// OriginalTextMDV2 gets the original markdownV2 formatting of a message text or caption. +func (m *Message) OriginalTextMDV2() string { + text, ents := m.TextAndEntities() + return getOrigMsgMDV2(utf16.Encode([]rune(text)), ents) +} + +// OriginalTextHTML gets the original HTML formatting of a message text caption. +func (m *Message) OriginalTextHTML() string { + text, ents := m.TextAndEntities() + return getOrigMsgHTML(utf16.Encode([]rune(text)), ents) +} + +// Does not support nesting. only look at upper entities. +func getOrigMsgMD(utf16Data []uint16, ents []MessageEntity) string { + out := strings.Builder{} + prev := 0 + + for _, ent := range getUpperEntities(ents) { + newPrev := ent.Offset + ent.Length + prevText := string(utf16.Decode(utf16Data[prev:ent.Offset])) + + text := utf16.Decode(utf16Data[ent.Offset:newPrev]) + pre, cleanCntnt, post := splitEdgeWhitespace(string(text), ent) + cleanCntntRune := []rune(cleanCntnt) + + switch ent.Type { + case EntityBold, EntityItalic, EntityCode: + out.WriteString(prevText + pre + mdMap[ent.Type] + escapeContainedMDV1(cleanCntntRune, []rune(mdMap[ent.Type])) + mdMap[ent.Type] + post) + case EntityPre: + if ent.Language == "" { + out.WriteString(prevText + pre + mdMap[ent.Type] + + escapeContainedMDV1(cleanCntntRune, []rune(mdMap[ent.Type])) + mdMap[ent.Type] + post) + } else { + out.WriteString(prevText + pre + mdMap[ent.Type] + + ent.Language + "\n" + escapeContainedMDV1(cleanCntntRune, []rune(mdMap[ent.Type])) + mdMap[ent.Type] + post) + } + case EntityTextMention: + out.WriteString(prevText + pre + "[" + escapeContainedMDV1(cleanCntntRune, []rune("[]()")) + "](tg://user?id=" + + strconv.FormatInt(ent.User.ID, 10) + ")" + post) + case EntityTextLink: + out.WriteString(prevText + pre + "[" + escapeContainedMDV1(cleanCntntRune, []rune("[]()")) + "](" + ent.URL + ")" + post) + default: + continue + } + + prev = newPrev + } + + out.WriteString(string(utf16.Decode(utf16Data[prev:]))) + + return out.String() +} + +func getOrigMsgHTML(utf16Data []uint16, ents []MessageEntity) string { + if len(ents) == 0 { + return html.EscapeString(string(utf16.Decode(utf16Data))) + } + + bd := strings.Builder{} + prev := 0 + + for _, e := range getUpperEntities(ents) { + data, end := fillNestedHTML(utf16Data, e, prev, getChildEntities(e, ents)) + bd.WriteString(data) + + prev = end + } + + bd.WriteString(html.EscapeString(string(utf16.Decode(utf16Data[prev:])))) + + return bd.String() +} + +func getOrigMsgMDV2(utf16Data []uint16, ents []MessageEntity) (origMsg string) { + if len(ents) == 0 { + return string(utf16.Decode(utf16Data)) + } + + bd := strings.Builder{} + prev := 0 + + for _, e := range getUpperEntities(ents) { + data, end := fillNestedMarkdownV2(utf16Data, e, prev, getChildEntities(e, ents)) + bd.WriteString(data) + + prev = end + } + + bd.WriteString(string(utf16.Decode(utf16Data[prev:]))) + + return bd.String() +} + +func fillNestedHTML(data []uint16, ent MessageEntity, start int, entities []MessageEntity) (finalHTML string, entEnd int) { + entEnd = ent.Offset + ent.Length + if len(entities) == 0 || entEnd < entities[0].Offset { + // no nesting; just return straight away and move to next. + return writeFinalHTML(data, ent, start, html.EscapeString(string(utf16.Decode(data[ent.Offset:entEnd])))), entEnd + } + + subPrev := ent.Offset + subEnd := ent.Offset + bd := strings.Builder{} + + for _, e := range getUpperEntities(entities) { + if e.Offset < subEnd || e == ent { + continue + } + + if e.Offset >= entEnd { + break + } + + out, end := fillNestedHTML(data, e, subPrev, getChildEntities(e, entities)) + bd.WriteString(out) + + subPrev = end + } + + bd.WriteString(html.EscapeString(string(utf16.Decode(data[subPrev:entEnd])))) + + return writeFinalHTML(data, ent, start, bd.String()), entEnd +} + +func fillNestedMarkdownV2( + data []uint16, + ent MessageEntity, + start int, + entities []MessageEntity, +) (finalMD string, entEnd int) { + entEnd = ent.Offset + ent.Length + if len(entities) == 0 || entEnd < entities[0].Offset { + // no nesting; just return straight away and move to next. + return writeFinalMarkdownV2(data, ent, start, string(utf16.Decode(data[ent.Offset:entEnd]))), entEnd + } + + subPrev := ent.Offset + subEnd := ent.Offset + bd := strings.Builder{} + + for _, e := range getUpperEntities(entities) { + if e.Offset < subEnd || e == ent { + continue + } + + if e.Offset >= entEnd { + break + } + + out, end := fillNestedMarkdownV2(data, e, subPrev, getChildEntities(e, entities)) + bd.WriteString(out) + + subPrev = end + } + + bd.WriteString(string(utf16.Decode(data[subPrev:entEnd]))) + + return writeFinalMarkdownV2(data, ent, start, bd.String()), entEnd +} + +func writeFinalHTML(data []uint16, ent MessageEntity, start int, cntnt string) string { + prevText := html.EscapeString(string(utf16.Decode(data[start:ent.Offset]))) + switch ent.Type { + case EntityBold, EntityItalic, EntityCode, EntityUnderline, EntityStrikethrough, EntitySpoiler: + return prevText + "<" + htmlMap[ent.Type] + ">" + cntnt + "" + case EntityPre: + if ent.Language == "" { + return prevText + "
" + cntnt + "
" + } + + return prevText + `
` + cntnt + "
" + case EntityCustomEmoji: + return prevText + `` + cntnt + "" + case EntityDateTime: + if ent.DateTimeFormat != "" { + return prevText + `` + cntnt + "" + } + + return prevText + `` + cntnt + "" + case EntityTextMention: + return prevText + `` + cntnt + "" + case EntityTextLink: + return prevText + `` + cntnt + "" + case EntityBlockquote: + return prevText + `
` + cntnt + "
" + case EntityExpandableBlockquote: + return prevText + `
` + cntnt + "
" + default: + return prevText + cntnt + } +} + +// closeHTMLTag makes sure to generate the correct HTML closing tag for a given opening tag. +func closeHTMLTag(s string) string { + if !strings.HasPrefix(s, "span") { + return s + } + + return "span" +} + +func writeFinalMarkdownV2(data []uint16, ent MessageEntity, start int, cntnt string) string { + prevText := string(utf16.Decode(data[start:ent.Offset])) + pre, cleanCntnt, post := splitEdgeWhitespace(cntnt, ent) + + switch ent.Type { + case EntityBold, EntityItalic, EntityCode, EntityUnderline, EntityStrikethrough, EntitySpoiler: + return prevText + pre + mdV2Map[ent.Type] + cleanCntnt + mdV2Map[ent.Type] + post + case EntityPre: + if ent.Language == "" { + return prevText + pre + "```\n" + cleanCntnt + "```" + post + } + + return prevText + pre + "```" + ent.Language + "\n" + cleanCntnt + "```" + post + case EntityCustomEmoji: + return prevText + pre + "![" + cleanCntnt + "](tg://emoji?id=" + ent.CustomEmojiID + ")" + post + case EntityDateTime: + if ent.DateTimeFormat != "" { + return prevText + pre + "![" + cleanCntnt + "](tg://time?unix=" + + strconv.Itoa(ent.UnixTime) + "&format=" + ent.DateTimeFormat + ")" + post + } + + return prevText + pre + "![" + cleanCntnt + "](tg://time?unix=" + strconv.Itoa(ent.UnixTime) + ")" + post + case EntityTextMention: + return prevText + pre + "[" + cleanCntnt + "](tg://user?id=" + strconv.FormatInt(ent.User.ID, 10) + ")" + post + case EntityTextLink: + return prevText + pre + "[" + cleanCntnt + "](" + ent.URL + ")" + post + case EntityBlockquote: + return prevText + pre + ">" + strings.Join(strings.Split(cleanCntnt, "\n"), "\n>") + post + case EntityExpandableBlockquote: + return prevText + pre + "**>" + strings.Join(strings.Split(cleanCntnt, "\n"), "\n>") + "||" + post + default: + return prevText + cntnt + } +} + +func getUpperEntities(ents []MessageEntity) []MessageEntity { + prev := 0 + uppers := make([]MessageEntity, 0, len(ents)) + + for _, e := range ents { + if e.Offset < prev { + continue + } + + uppers = append(uppers, e) + prev = e.Offset + e.Length + } + + return uppers +} + +func getChildEntities(ent MessageEntity, ents []MessageEntity) []MessageEntity { + end := ent.Offset + ent.Length + children := make([]MessageEntity, 0, len(ents)) + + for _, e := range ents { + if e.Offset < ent.Offset || e == ent { + continue + } + + if e.Offset >= end { + break + } + + children = append(children, e) + } + + return children +} + +func splitEdgeWhitespace(text string, ent MessageEntity) (pre, cntnt, post string) { + keepNewLines := ent.Type == EntityPre + + bd := strings.Builder{} + rText := []rune(text) + + for i := 0; i < len(rText) && unicode.IsSpace(rText[i]) && (!keepNewLines || rText[i] != '\n'); i++ { + bd.WriteRune(rText[i]) + } + + pre = bd.String() + + text = strings.TrimPrefix(text, pre) + + bd.Reset() + + for i := len(rText) - 1; i >= 0 && unicode.IsSpace(rText[i]); i-- { + bd.WriteRune(rText[i]) + } + + post = bd.String() + + return pre, strings.TrimSuffix(text, post), post +} + +func escapeContainedMDV1(data, mdType []rune) string { + out := strings.Builder{} + + for _, x := range data { + if slices.Contains(mdType, x) { + out.WriteRune('\\') + } + + out.WriteRune(x) + } + + return out.String() +} diff --git a/formatting_test.go b/formatting_test.go new file mode 100644 index 0000000..7fc609f --- /dev/null +++ b/formatting_test.go @@ -0,0 +1,231 @@ +package botapi + +import "testing" + +func TestMessage_OriginalMD(t *testing.T) { + msg := &Message{ + Text: "hello world", + Entities: []MessageEntity{ + { + Type: EntityBold, + Offset: 6, + Length: 5, + }, + }, + } + + got := msg.OriginalMD() + want := "hello *world*" + + if got != want { + t.Fatalf("OriginalMD() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalMDV2(t *testing.T) { + msg := &Message{ + Text: "hello world", + Entities: []MessageEntity{ + { + Type: EntityUnderline, + Offset: 6, + Length: 5, + }, + }, + } + + got := msg.OriginalMDV2() + want := "hello __world__" + + if got != want { + t.Fatalf("OriginalMDV2() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalHTML(t *testing.T) { + msg := &Message{ + Text: "hello world", + Entities: []MessageEntity{ + { + Type: EntityItalic, + Offset: 6, + Length: 5, + }, + }, + } + + got := msg.OriginalHTML() + want := "hello world" + + if got != want { + t.Fatalf("OriginalHTML() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalHTML_EscapesText(t *testing.T) { + msg := &Message{ + Text: "hello", + } + + got := msg.OriginalHTML() + want := "<b>hello</b>" + + if got != want { + t.Fatalf("OriginalHTML() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalMD_TextLink(t *testing.T) { + msg := &Message{ + Text: "OpenAI", + Entities: []MessageEntity{ + { + Type: EntityTextLink, + Offset: 0, + Length: 6, + URL: "https://openai.com", + }, + }, + } + + got := msg.OriginalMD() + want := "[OpenAI](https://openai.com)" + + if got != want { + t.Fatalf("OriginalMD() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalMD_TextMention(t *testing.T) { + msg := &Message{ + Text: "John", + Entities: []MessageEntity{ + { + Type: EntityTextMention, + Offset: 0, + Length: 4, + User: &User{ + ID: 12345, + }, + }, + }, + } + + got := msg.OriginalMD() + want := "[John](tg://user?id=12345)" + + if got != want { + t.Fatalf("OriginalMD() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalMD_PreWithLanguage(t *testing.T) { + msg := &Message{ + Text: "fmt.Println()", + Entities: []MessageEntity{ + { + Type: EntityPre, + Offset: 0, + Length: 13, + Language: "go", + }, + }, + } + + got := msg.OriginalMD() + want := "```go\nfmt.Println()```" + + if got != want { + t.Fatalf("OriginalMD() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalMD_EscapeCodeMarker(t *testing.T) { + msg := &Message{ + Text: "`test`", + Entities: []MessageEntity{ + { + Type: EntityCode, + Offset: 0, + Length: 6, + }, + }, + } + + got := msg.OriginalMD() + want := "`\\`test\\``" + + if got != want { + t.Fatalf("OriginalMD() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalMDV2_NestedEntities(t *testing.T) { + msg := &Message{ + Text: "hello world", + Entities: []MessageEntity{ + { + Type: EntityBold, + Offset: 0, + Length: 11, + }, + { + Type: EntityItalic, + Offset: 6, + Length: 5, + }, + }, + } + + got := msg.OriginalMDV2() + want := "*hello _world_*" + + if got != want { + t.Fatalf("OriginalMDV2() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalHTML_NestedEntities(t *testing.T) { + msg := &Message{ + Text: "hello world", + Entities: []MessageEntity{ + { + Type: EntityBold, + Offset: 0, + Length: 11, + }, + { + Type: EntityItalic, + Offset: 6, + Length: 5, + }, + }, + } + + got := msg.OriginalHTML() + want := "hello world" + + if got != want { + t.Fatalf("OriginalHTML() = %q, want %q", got, want) + } +} + +func TestMessage_OriginalTextMD_UsesCaptionWhenTextEmpty(t *testing.T) { + msg := &Message{ + Caption: "caption", + CaptionEntities: []MessageEntity{ + { + Type: EntityBold, + Offset: 0, + Length: 7, + }, + }, + } + + got := msg.OriginalTextMD() + want := "*caption*" + + if got != want { + t.Fatalf("OriginalTextMD() = %q, want %q", got, want) + } +} From 556618664c0adcbe91664714f08bed67cac65cf2 Mon Sep 17 00:00:00 2001 From: fluffur Date: Sun, 21 Jun 2026 11:41:51 +0500 Subject: [PATCH 2/3] style(formatting): fix indentation --- formatting.go | 1 + 1 file changed, 1 insertion(+) diff --git a/formatting.go b/formatting.go index 1f9221a..0912dfb 100644 --- a/formatting.go +++ b/formatting.go @@ -45,6 +45,7 @@ func (m *Message) TextAndEntities() (string, []MessageEntity) { if m.Text != "" { return m.Text, m.Entities } + return m.Caption, m.CaptionEntities } From f1e69298e51c1b1c0bd45870e85ffba521e802e8 Mon Sep 17 00:00:00 2001 From: fluffur Date: Sun, 21 Jun 2026 11:50:21 +0500 Subject: [PATCH 3/3] test(formatting): cover massage formatting with tests --- formatting_test.go | 125 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/formatting_test.go b/formatting_test.go index 7fc609f..e84ca64 100644 --- a/formatting_test.go +++ b/formatting_test.go @@ -1,6 +1,9 @@ package botapi -import "testing" +import ( + "context" + "testing" +) func TestMessage_OriginalMD(t *testing.T) { msg := &Message{ @@ -229,3 +232,123 @@ func TestMessage_OriginalTextMD_UsesCaptionWhenTextEmpty(t *testing.T) { t.Fatalf("OriginalTextMD() = %q, want %q", got, want) } } + +func TestMessage_OriginalCaptionMD(t *testing.T) { + msg := &Message{ + Caption: "caption", + CaptionEntities: []MessageEntity{ + { + Type: EntityItalic, + Offset: 0, + Length: 7, + }, + }, + } + + if got := msg.OriginalCaptionMD(); got != "_caption_" { + t.Fatalf("got %q", got) + } +} + +func TestMessage_OriginalCaptionMDV2(t *testing.T) { + msg := &Message{ + Caption: "caption", + CaptionEntities: []MessageEntity{ + { + Type: EntityUnderline, + Offset: 0, + Length: 7, + }, + }, + } + + if got := msg.OriginalCaptionMDV2(); got != "__caption__" { + t.Fatalf("got %q", got) + } +} + +func TestMessage_OriginalCaptionHTML(t *testing.T) { + msg := &Message{ + Caption: "caption", + CaptionEntities: []MessageEntity{ + { + Type: EntityBold, + Offset: 0, + Length: 7, + }, + }, + } + + if got := msg.OriginalCaptionHTML(); got != "caption" { + t.Fatalf("got %q", got) + } +} + +func TestMessage_OriginalMDV2_Blockquote(t *testing.T) { + msg := &Message{ + Text: "line1\nline2", + Entities: []MessageEntity{ + { + Type: EntityBlockquote, + Offset: 0, + Length: 11, + }, + }, + } + + want := ">line1\n>line2" + + if got := msg.OriginalMDV2(); got != want { + t.Fatalf("got %q want %q", got, want) + } +} + +func TestMessage_OriginalMDV2_Spoiler(t *testing.T) { + msg := &Message{ + Text: "secret", + Entities: []MessageEntity{ + { + Type: EntitySpoiler, + Offset: 0, + Length: 6, + }, + }, + } + + if got := msg.OriginalMDV2(); got != "||secret||" { + t.Fatalf("got %q", got) + } +} + +func TestMessage_OriginalHTML_CustomEmoji(t *testing.T) { + msg := &Message{ + Text: "🙂", + Entities: []MessageEntity{ + { + Type: EntityCustomEmoji, + Offset: 0, + Length: 2, + CustomEmojiID: "123", + }, + }, + } + + want := `🙂` + + if got := msg.OriginalHTML(); got != want { + t.Fatalf("got %q want %q", got, want) + } +} + +func TestAnswerInlineQuery_NilResult(t *testing.T) { + b := newMockBot(newMockInvoker()) + + err := b.AnswerInlineQuery( + context.Background(), + "123", + []InlineQueryResult{nil}, + ) + if err == nil { + t.Fatal("expected error") + } +}