From 810eb02cc298433eaa372f4e1ab6b9c6c9e70e9a Mon Sep 17 00:00:00 2001 From: David Zhang Date: Fri, 13 Mar 2026 02:35:08 +0000 Subject: [PATCH] feat(telegram): convert entities to markdown in parsed messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Telegram delivers formatting (bold, italic, links, code, etc.) as separate entity objects alongside plain text. Previously, parseTelegramMessage only used the raw text, losing all entity information — most critically, text_link entity URLs were dropped entirely. This adds applyTelegramEntities() which reconstructs markdown syntax from entities before storing the message text. Supported entity types: text_link, bold, italic, code, pre, and strikethrough. Other entity types (url, mention, bot_command) are already present in the text and left unchanged. Also adds the missing `url` and `language` fields to TelegramMessageEntity. --- packages/adapter-telegram/src/index.test.ts | 122 ++++++++++++++++++++ packages/adapter-telegram/src/index.ts | 90 ++++++++++++++- packages/adapter-telegram/src/types.ts | 2 + 3 files changed, 212 insertions(+), 2 deletions(-) diff --git a/packages/adapter-telegram/src/index.test.ts b/packages/adapter-telegram/src/index.test.ts index a0d64168..902ad06e 100644 --- a/packages/adapter-telegram/src/index.test.ts +++ b/packages/adapter-telegram/src/index.test.ts @@ -9,6 +9,7 @@ import type { ChatInstance, Logger } from "chat"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { encodeTelegramCallbackData } from "./cards"; import { + applyTelegramEntities, createTelegramAdapter, TelegramAdapter, type TelegramMessage, @@ -1803,3 +1804,124 @@ describe("TelegramAdapter", () => { expect(sendMessageBody.text).toBe("forum topic message"); }); }); + +describe("applyTelegramEntities", () => { + it("returns text unchanged when no entities", () => { + expect(applyTelegramEntities("hello world", [])).toBe("hello world"); + }); + + it("converts text_link entities to markdown links", () => { + const result = applyTelegramEntities("Visit our website for details", [ + { type: "text_link", offset: 10, length: 7, url: "https://example.com" }, + ]); + expect(result).toBe("Visit our [website](https://example.com) for details"); + }); + + it("converts bold entities to markdown bold", () => { + const result = applyTelegramEntities("hello world", [ + { type: "bold", offset: 6, length: 5 }, + ]); + expect(result).toBe("hello **world**"); + }); + + it("converts italic entities to markdown italic", () => { + const result = applyTelegramEntities("hello world", [ + { type: "italic", offset: 0, length: 5 }, + ]); + expect(result).toBe("*hello* world"); + }); + + it("converts code entities to inline code", () => { + const result = applyTelegramEntities("use the console.log function", [ + { type: "code", offset: 8, length: 11 }, + ]); + expect(result).toBe("use the `console.log` function"); + }); + + it("converts pre entities to code blocks", () => { + const result = applyTelegramEntities("const x = 1", [ + { type: "pre", offset: 0, length: 11 }, + ]); + expect(result).toBe("```\nconst x = 1\n```"); + }); + + it("converts pre entities with language", () => { + const result = applyTelegramEntities("const x = 1", [ + { type: "pre", offset: 0, length: 11, language: "typescript" }, + ]); + expect(result).toBe("```typescript\nconst x = 1\n```"); + }); + + it("converts strikethrough entities", () => { + const result = applyTelegramEntities("old text here", [ + { type: "strikethrough", offset: 0, length: 8 }, + ]); + expect(result).toBe("~~old text~~ here"); + }); + + it("leaves url entities unchanged (already in text)", () => { + const result = applyTelegramEntities("check https://example.com out", [ + { type: "url", offset: 6, length: 19 }, + ]); + expect(result).toBe("check https://example.com out"); + }); + + it("leaves mention entities unchanged", () => { + const result = applyTelegramEntities("hey @user check this", [ + { type: "mention", offset: 4, length: 5 }, + ]); + expect(result).toBe("hey @user check this"); + }); + + it("handles multiple non-overlapping entities", () => { + const result = applyTelegramEntities("hello world foo", [ + { type: "bold", offset: 0, length: 5 }, + { type: "italic", offset: 6, length: 5 }, + ]); + expect(result).toBe("**hello** *world* foo"); + }); + + it("handles text_link with special markdown chars in text", () => { + const result = applyTelegramEntities("click [here]", [ + { type: "text_link", offset: 6, length: 6, url: "https://example.com" }, + ]); + expect(result).toBe("click [\\[here\\]](https://example.com)"); + }); + + it("preserves parseMessage text with entities", async () => { + mockFetch.mockResolvedValueOnce( + telegramOk({ + id: 999, + is_bot: true, + first_name: "Bot", + username: "mybot", + }) + ); + + const adapter = createTelegramAdapter({ + botToken: "token", + mode: "webhook", + logger: mockLogger, + userName: "mybot", + }); + + await adapter.initialize(createMockChat()); + + const messageWithLink = sampleMessage({ + text: "Visit our website for details", + entities: [ + { + type: "text_link", + offset: 10, + length: 7, + url: "https://example.com", + }, + ], + }); + + const parsed = adapter.parseMessage(messageWithLink); + expect(parsed.text).toBe( + "Visit our [website](https://example.com) for details" + ); + }); +}); diff --git a/packages/adapter-telegram/src/index.ts b/packages/adapter-telegram/src/index.ts index ee2cdd22..5b0f3418 100644 --- a/packages/adapter-telegram/src/index.ts +++ b/packages/adapter-telegram/src/index.ts @@ -101,6 +101,90 @@ interface ResolvedTelegramLongPollingConfig { type TelegramRuntimeMode = "webhook" | "polling"; +/** + * Escape markdown special characters inside entity text so wrapping + * with markdown syntax doesn't break parsing. + */ +const escapeMarkdownInEntity = (text: string): string => + text.replace(/([[\]()\\])/g, "\\$1"); + +/** + * Convert Telegram message entities to markdown. + * + * Telegram delivers formatting as separate entity objects alongside plain text. + * This function reconstructs markdown so that links, bold, italic, code, etc. + * are preserved when the text is later parsed as markdown. + * + * Entities use UTF-16 offsets, which match JavaScript's native string indexing. + */ +export function applyTelegramEntities( + text: string, + entities: TelegramMessageEntity[] +): string { + if (entities.length === 0) { + return text; + } + + // Sort entities by offset descending so replacements don't shift later offsets + const sorted = [...entities].sort((a, b) => { + const offsetDiff = b.offset - a.offset; + // For entities at the same offset, apply the shorter (inner) one first + if (offsetDiff !== 0) { + return offsetDiff; + } + return a.length - b.length; + }); + + let result = text; + + for (const entity of sorted) { + const start = entity.offset; + const end = entity.offset + entity.length; + const entityText = result.slice(start, end); + + let replacement: string | undefined; + + switch (entity.type) { + case "text_link": { + if (entity.url) { + replacement = `[${escapeMarkdownInEntity(entityText)}](${entity.url})`; + } + break; + } + case "bold": { + replacement = `**${entityText}**`; + break; + } + case "italic": { + replacement = `*${entityText}*`; + break; + } + case "code": { + replacement = `\`${entityText}\``; + break; + } + case "pre": { + const lang = entity.language ?? ""; + replacement = `\`\`\`${lang}\n${entityText}\n\`\`\``; + break; + } + case "strikethrough": { + replacement = `~~${entityText}~~`; + break; + } + default: + // url, mention, bot_command, etc. are already present in the text as-is + break; + } + + if (replacement !== undefined) { + result = result.slice(0, start) + replacement + result.slice(end); + } + } + + return result; +} + export class TelegramAdapter implements Adapter { @@ -936,7 +1020,9 @@ export class TelegramAdapter raw: TelegramMessage, threadId: string ): Message { - const text = raw.text ?? raw.caption ?? ""; + const plainText = raw.text ?? raw.caption ?? ""; + const entities = raw.entities ?? raw.caption_entities ?? []; + const text = applyTelegramEntities(plainText, entities); let author: TelegramMessageAuthor; if (raw.from) { @@ -971,7 +1057,7 @@ export class TelegramAdapter : undefined, }, attachments: this.extractAttachments(raw), - isMention: this.isBotMentioned(raw, text), + isMention: this.isBotMentioned(raw, plainText), }); return message; diff --git a/packages/adapter-telegram/src/types.ts b/packages/adapter-telegram/src/types.ts index b313fa9e..250c8aeb 100644 --- a/packages/adapter-telegram/src/types.ts +++ b/packages/adapter-telegram/src/types.ts @@ -99,9 +99,11 @@ export interface TelegramChat { * @see https://core.telegram.org/bots/api#messageentity */ export interface TelegramMessageEntity { + language?: string; length: number; offset: number; type: string; + url?: string; user?: TelegramUser; }