From 810eb02cc298433eaa372f4e1ab6b9c6c9e70e9a Mon Sep 17 00:00:00 2001
From: David Zhang <dzz0615@gmail.com>
Date: Fri, 13 Mar 2026 02:35:08 +0000
Subject: [PATCH] feat(telegram): convert entities to markdown in parsed
 messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Telegram delivers formatting (bold, italic, links, code, etc.) as separate
entity objects alongside plain text. Previously, parseTelegramMessage only
used the raw text, losing all entity information — most critically, text_link
entity URLs were dropped entirely.

This adds applyTelegramEntities() which reconstructs markdown syntax from
entities before storing the message text. Supported entity types: text_link,
bold, italic, code, pre, and strikethrough. Other entity types (url, mention,
bot_command) are already present in the text and left unchanged.

Also adds the missing `url` and `language` fields to TelegramMessageEntity.
---
 packages/adapter-telegram/src/index.test.ts | 122 ++++++++++++++++++++
 packages/adapter-telegram/src/index.ts      |  90 ++++++++++++++-
 packages/adapter-telegram/src/types.ts      |   2 +
 3 files changed, 212 insertions(+), 2 deletions(-)

diff --git a/packages/adapter-telegram/src/index.test.ts b/packages/adapter-telegram/src/index.test.ts
index a0d64168..902ad06e 100644
--- a/packages/adapter-telegram/src/index.test.ts
+++ b/packages/adapter-telegram/src/index.test.ts
@@ -9,6 +9,7 @@ import type { ChatInstance, Logger } from "chat";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import { encodeTelegramCallbackData } from "./cards";
 import {
+  applyTelegramEntities,
   createTelegramAdapter,
   TelegramAdapter,
   type TelegramMessage,
@@ -1803,3 +1804,124 @@ describe("TelegramAdapter", () => {
     expect(sendMessageBody.text).toBe("forum topic message");
   });
 });
+
+describe("applyTelegramEntities", () => {
+  it("returns text unchanged when no entities", () => {
+    expect(applyTelegramEntities("hello world", [])).toBe("hello world");
+  });
+
+  it("converts text_link entities to markdown links", () => {
+    const result = applyTelegramEntities("Visit our website for details", [
+      { type: "text_link", offset: 10, length: 7, url: "https://example.com" },
+    ]);
+    expect(result).toBe("Visit our [website](https://example.com) for details");
+  });
+
+  it("converts bold entities to markdown bold", () => {
+    const result = applyTelegramEntities("hello world", [
+      { type: "bold", offset: 6, length: 5 },
+    ]);
+    expect(result).toBe("hello **world**");
+  });
+
+  it("converts italic entities to markdown italic", () => {
+    const result = applyTelegramEntities("hello world", [
+      { type: "italic", offset: 0, length: 5 },
+    ]);
+    expect(result).toBe("*hello* world");
+  });
+
+  it("converts code entities to inline code", () => {
+    const result = applyTelegramEntities("use the console.log function", [
+      { type: "code", offset: 8, length: 11 },
+    ]);
+    expect(result).toBe("use the `console.log` function");
+  });
+
+  it("converts pre entities to code blocks", () => {
+    const result = applyTelegramEntities("const x = 1", [
+      { type: "pre", offset: 0, length: 11 },
+    ]);
+    expect(result).toBe("```\nconst x = 1\n```");
+  });
+
+  it("converts pre entities with language", () => {
+    const result = applyTelegramEntities("const x = 1", [
+      { type: "pre", offset: 0, length: 11, language: "typescript" },
+    ]);
+    expect(result).toBe("```typescript\nconst x = 1\n```");
+  });
+
+  it("converts strikethrough entities", () => {
+    const result = applyTelegramEntities("old text here", [
+      { type: "strikethrough", offset: 0, length: 8 },
+    ]);
+    expect(result).toBe("~~old text~~ here");
+  });
+
+  it("leaves url entities unchanged (already in text)", () => {
+    const result = applyTelegramEntities("check https://example.com out", [
+      { type: "url", offset: 6, length: 19 },
+    ]);
+    expect(result).toBe("check https://example.com out");
+  });
+
+  it("leaves mention entities unchanged", () => {
+    const result = applyTelegramEntities("hey @user check this", [
+      { type: "mention", offset: 4, length: 5 },
+    ]);
+    expect(result).toBe("hey @user check this");
+  });
+
+  it("handles multiple non-overlapping entities", () => {
+    const result = applyTelegramEntities("hello world foo", [
+      { type: "bold", offset: 0, length: 5 },
+      { type: "italic", offset: 6, length: 5 },
+    ]);
+    expect(result).toBe("**hello** *world* foo");
+  });
+
+  it("handles text_link with special markdown chars in text", () => {
+    const result = applyTelegramEntities("click [here]", [
+      { type: "text_link", offset: 6, length: 6, url: "https://example.com" },
+    ]);
+    expect(result).toBe("click [\\[here\\]](https://example.com)");
+  });
+
+  it("preserves parseMessage text with entities", async () => {
+    mockFetch.mockResolvedValueOnce(
+      telegramOk({
+        id: 999,
+        is_bot: true,
+        first_name: "Bot",
+        username: "mybot",
+      })
+    );
+
+    const adapter = createTelegramAdapter({
+      botToken: "token",
+      mode: "webhook",
+      logger: mockLogger,
+      userName: "mybot",
+    });
+
+    await adapter.initialize(createMockChat());
+
+    const messageWithLink = sampleMessage({
+      text: "Visit our website for details",
+      entities: [
+        {
+          type: "text_link",
+          offset: 10,
+          length: 7,
+          url: "https://example.com",
+        },
+      ],
+    });
+
+    const parsed = adapter.parseMessage(messageWithLink);
+    expect(parsed.text).toBe(
+      "Visit our [website](https://example.com) for details"
+    );
+  });
+});
diff --git a/packages/adapter-telegram/src/index.ts b/packages/adapter-telegram/src/index.ts
index ee2cdd22..5b0f3418 100644
--- a/packages/adapter-telegram/src/index.ts
+++ b/packages/adapter-telegram/src/index.ts
@@ -101,6 +101,90 @@ interface ResolvedTelegramLongPollingConfig {
 
 type TelegramRuntimeMode = "webhook" | "polling";
 
+/**
+ * Escape markdown special characters inside entity text so wrapping
+ * with markdown syntax doesn't break parsing.
+ */
+const escapeMarkdownInEntity = (text: string): string =>
+  text.replace(/([[\]()\\])/g, "\\$1");
+
+/**
+ * Convert Telegram message entities to markdown.
+ *
+ * Telegram delivers formatting as separate entity objects alongside plain text.
+ * This function reconstructs markdown so that links, bold, italic, code, etc.
+ * are preserved when the text is later parsed as markdown.
+ *
+ * Entities use UTF-16 offsets, which match JavaScript's native string indexing.
+ */
+export function applyTelegramEntities(
+  text: string,
+  entities: TelegramMessageEntity[]
+): string {
+  if (entities.length === 0) {
+    return text;
+  }
+
+  // Sort entities by offset descending so replacements don't shift later offsets
+  const sorted = [...entities].sort((a, b) => {
+    const offsetDiff = b.offset - a.offset;
+    // For entities at the same offset, apply the shorter (inner) one first
+    if (offsetDiff !== 0) {
+      return offsetDiff;
+    }
+    return a.length - b.length;
+  });
+
+  let result = text;
+
+  for (const entity of sorted) {
+    const start = entity.offset;
+    const end = entity.offset + entity.length;
+    const entityText = result.slice(start, end);
+
+    let replacement: string | undefined;
+
+    switch (entity.type) {
+      case "text_link": {
+        if (entity.url) {
+          replacement = `[${escapeMarkdownInEntity(entityText)}](${entity.url})`;
+        }
+        break;
+      }
+      case "bold": {
+        replacement = `**${entityText}**`;
+        break;
+      }
+      case "italic": {
+        replacement = `*${entityText}*`;
+        break;
+      }
+      case "code": {
+        replacement = `\`${entityText}\``;
+        break;
+      }
+      case "pre": {
+        const lang = entity.language ?? "";
+        replacement = `\`\`\`${lang}\n${entityText}\n\`\`\``;
+        break;
+      }
+      case "strikethrough": {
+        replacement = `~~${entityText}~~`;
+        break;
+      }
+      default:
+        // url, mention, bot_command, etc. are already present in the text as-is
+        break;
+    }
+
+    if (replacement !== undefined) {
+      result = result.slice(0, start) + replacement + result.slice(end);
+    }
+  }
+
+  return result;
+}
+
 export class TelegramAdapter
   implements Adapter<TelegramThreadId, TelegramRawMessage>
 {
@@ -936,7 +1020,9 @@ export class TelegramAdapter
     raw: TelegramMessage,
     threadId: string
   ): Message<TelegramRawMessage> {
-    const text = raw.text ?? raw.caption ?? "";
+    const plainText = raw.text ?? raw.caption ?? "";
+    const entities = raw.entities ?? raw.caption_entities ?? [];
+    const text = applyTelegramEntities(plainText, entities);
     let author: TelegramMessageAuthor;
 
     if (raw.from) {
@@ -971,7 +1057,7 @@ export class TelegramAdapter
             : undefined,
       },
       attachments: this.extractAttachments(raw),
-      isMention: this.isBotMentioned(raw, text),
+      isMention: this.isBotMentioned(raw, plainText),
     });
 
     return message;
diff --git a/packages/adapter-telegram/src/types.ts b/packages/adapter-telegram/src/types.ts
index b313fa9e..250c8aeb 100644
--- a/packages/adapter-telegram/src/types.ts
+++ b/packages/adapter-telegram/src/types.ts
@@ -99,9 +99,11 @@ export interface TelegramChat {
  * @see https://core.telegram.org/bots/api#messageentity
  */
 export interface TelegramMessageEntity {
+  language?: string;
   length: number;
   offset: number;
   type: string;
+  url?: string;
   user?: TelegramUser;
 }