diff --git a/src/lib/bridge/adapters/telegram-adapter.ts b/src/lib/bridge/adapters/telegram-adapter.ts index 173220b50..309a44d4e 100644 --- a/src/lib/bridge/adapters/telegram-adapter.ts +++ b/src/lib/bridge/adapters/telegram-adapter.ts @@ -18,8 +18,10 @@ import { BaseChannelAdapter, registerAdapterFactory } from '../channel-adapter'; import { callTelegramApi, sendMessageDraft } from './telegram-utils'; import { isImageEnabled, + isDocumentEnabled, downloadPhoto, downloadDocumentImage, + downloadDocument, isSupportedImageMime, inferMimeType, } from './telegram-media'; @@ -552,6 +554,7 @@ export class TelegramAdapter extends BaseChannelAdapter { const hasPhoto = m.photo && m.photo.length > 0; const hasDocImage = m.document && this.isDocumentImage(m.document); const hasMedia = hasPhoto || hasDocImage; + const hasNonImageDocument = !!m.document && !hasDocImage; // Unified text extraction: text for regular messages, caption for media messages const messageText = m.text ?? m.caption ?? ''; @@ -565,9 +568,17 @@ export class TelegramAdapter extends BaseChannelAdapter { // Single image message — process immediately await this.processSingleImageMessage(update, chatId, userId, displayName); } + } else if (hasNonImageDocument && isDocumentEnabled()) { + if (m.media_group_id) { + // Mixed-media album entry — buffer with the rest of the group + this.bufferMediaGroup(m.media_group_id, update, chatId, userId, displayName); + } else { + // Single non-image document (PDF/DOCX/TXT/etc.) — download and enqueue + await this.processSingleDocumentMessage(update, chatId, userId, displayName); + } } else if (messageText) { // Text/caption message (covers: pure text, image_enabled=false + caption, - // unsupported document + caption) + // document_enabled=false + caption) const msg: InboundMessage = { messageId: String(m.message_id), address: { @@ -712,6 +723,80 @@ export class TelegramAdapter extends BaseChannelAdapter { this.enqueue(msg); } + /** + * Process a single non-image document message (no media_group_id). + * Downloads the file, attaches it with original name + MIME, and enqueues + * for the normal session-locked turn. Sends rejection notifications to + * Telegram on download failure or size overflow. + */ + private async processSingleDocumentMessage( + update: TelegramUpdate, + chatId: string, + userId: string, + displayName: string, + ): Promise { + const m = update.message!; + const token = this.botToken; + const address = { channelType: 'telegram' as const, chatId, userId, displayName }; + + if (!token || !m.document) { + this.markUpdateProcessed(update.update_id); + return; + } + + const attachments: FileAttachment[] = []; + const rejections: MediaDownloadResult[] = []; + + const result = await downloadDocument(token, m.document, String(m.message_id)); + if (result.attachment) { + attachments.push(result.attachment); + } else if (result.rejected) { + rejections.push(result); + } + + // Send rejection notification directly to user + if (rejections.length > 0) { + const notice = rejections.map(r => r.rejectedMessage || 'File processing failed').join('\n'); + this.send({ address, text: notice, parseMode: 'plain' }).catch(() => {}); + } + + const text = m.caption || m.text || ''; + const hasContent = attachments.length > 0 || text.trim(); + + if (!hasContent) { + // Download failed and no caption — mark processed without enqueue + this.markUpdateProcessed(update.update_id); + return; + } + + const summary = attachments.length > 0 + ? `[file: ${attachments[0].name}] ${text.slice(0, 150)}` + : text.slice(0, 200); + + // Audit log + try { + insertAuditLog({ + channelType: 'telegram', + chatId, + direction: 'inbound', + messageId: String(m.message_id), + summary, + }); + } catch { /* best effort */ } + + const msg: InboundMessage = { + messageId: String(m.message_id), + address, + text, + timestamp: m.date * 1000, + raw: update, + updateId: update.update_id, + attachments: attachments.length > 0 ? attachments : undefined, + }; + + this.enqueue(msg); + } + /** * Buffer a media group update for debounced processing. * Resets the 500ms timer on each new update in the same group. @@ -776,7 +861,10 @@ export class TelegramAdapter extends BaseChannelAdapter { let firstMessageId = ''; let firstDate = 0; - // Download all images in the group + // Download all media items in the group (photos, image-docs, and non-image documents). + // Tracks counts separately so the audit summary can report them distinctly. + let imageCount = 0; + let fileCount = 0; for (const update of entry.updates) { const m = update.message!; if (!firstMessageId) { @@ -792,6 +880,7 @@ export class TelegramAdapter extends BaseChannelAdapter { const result = await downloadPhoto(token, m.photo, String(m.message_id)); if (result.attachment) { attachments.push(result.attachment); + imageCount++; } else if (result.rejected && result.rejected !== 'unsupported_type') { rejections.push(result); } @@ -799,18 +888,30 @@ export class TelegramAdapter extends BaseChannelAdapter { const result = await downloadDocumentImage(token, m.document, String(m.message_id)); if (result.attachment) { attachments.push(result.attachment); + imageCount++; + } else if (result.rejected && result.rejected !== 'unsupported_type') { + rejections.push(result); + } + } else if (m.document) { + // Non-image document in a mixed-media album — only include if document + // attachments are enabled (mirrors the single-message dispatch gate). + if (!isDocumentEnabled()) continue; + const result = await downloadDocument(token, m.document, String(m.message_id)); + if (result.attachment) { + attachments.push(result.attachment); + fileCount++; } else if (result.rejected && result.rejected !== 'unsupported_type') { rejections.push(result); } } } - // Send rejection notification if any images failed + // Send rejection notification if any items failed if (rejections.length > 0) { - const reasons = rejections.map(r => r.rejectedMessage || 'Image processing failed').join('\n'); + const reasons = rejections.map(r => r.rejectedMessage || 'Attachment processing failed').join('\n'); const notice = rejections.length === 1 ? reasons - : `${rejections.length} image(s) failed:\n${reasons}`; + : `${rejections.length} attachment(s) failed:\n${reasons}`; this.send({ address, text: notice, parseMode: 'plain' }).catch(() => {}); } @@ -827,7 +928,7 @@ export class TelegramAdapter extends BaseChannelAdapter { } const summary = attachments.length > 0 - ? `[Album: ${attachments.length} image(s)] ${text.slice(0, 150)}` + ? `[Album: ${formatAlbumSummary(imageCount, fileCount)}] ${text.slice(0, 150)}` : text.slice(0, 200); try { @@ -862,5 +963,13 @@ export class TelegramAdapter extends BaseChannelAdapter { } } +/** Build the audit-summary parts string for a mixed-media album. */ +function formatAlbumSummary(imageCount: number, fileCount: number): string { + const parts: string[] = []; + if (imageCount > 0) parts.push(`${imageCount} image(s)`); + if (fileCount > 0) parts.push(`${fileCount} file(s)`); + return parts.join(', '); +} + // Self-register so bridge-manager can create TelegramAdapter via the registry. registerAdapterFactory('telegram', () => new TelegramAdapter()); diff --git a/src/lib/bridge/adapters/telegram-media.ts b/src/lib/bridge/adapters/telegram-media.ts index f8efda8de..15810c50a 100644 --- a/src/lib/bridge/adapters/telegram-media.ts +++ b/src/lib/bridge/adapters/telegram-media.ts @@ -1,9 +1,10 @@ /** - * Telegram Media — download and process images from Telegram messages. + * Telegram Media — download and process images and documents from Telegram messages. * * Handles photo[] size selection, file download via Bot API, base64 conversion, - * and document-type image validation. Produces FileAttachment objects that plug - * directly into the existing streamClaude vision pipeline. + * image validation, and arbitrary document attachments. Produces FileAttachment + * objects that plug directly into the streamClaude pipeline (vision for images, + * Read-tool path for non-image files via claude-client.ts). */ import type { FileAttachment } from '@/types'; @@ -17,6 +18,9 @@ const OPTIMAL_LONG_EDGE = 1568; /** Default max image size in bytes (20 MB). */ const DEFAULT_MAX_IMAGE_SIZE = 20 * 1024 * 1024; +/** Default max non-image file size in bytes (20 MB — Telegram Bot API ceiling). */ +const DEFAULT_MAX_FILE_SIZE = 20 * 1024 * 1024; + /** Max retry attempts for download. */ const MAX_RETRIES = 3; @@ -70,6 +74,15 @@ export function isImageEnabled(): boolean { return setting !== 'false'; } +/** + * Check whether the Telegram non-image document feature is enabled. + */ +export function isDocumentEnabled(): boolean { + const setting = getSetting('bridge_telegram_document_enabled'); + // Default to true if not explicitly set to 'false' + return setting !== 'false'; +} + /** * Get the configured max image size in bytes. */ @@ -82,6 +95,18 @@ function getMaxImageSize(): number { return DEFAULT_MAX_IMAGE_SIZE; } +/** + * Get the configured max non-image file size in bytes. + */ +function getMaxFileSize(): number { + const setting = getSetting('bridge_telegram_max_document_size'); + if (setting) { + const parsed = parseInt(setting, 10); + if (!isNaN(parsed) && parsed > 0) return parsed; + } + return DEFAULT_MAX_FILE_SIZE; +} + /** * Check if a MIME type is a supported image format. */ @@ -151,7 +176,12 @@ export async function downloadPhoto( messageId: string, ): Promise { const selected = selectOptimalPhoto(photos); - return downloadFileById(botToken, selected.file_id, messageId); + return downloadFileById(botToken, selected.file_id, messageId, { + maxSize: getMaxImageSize(), + mimeFallback: 'image/jpeg', + nameFallback: `image_${messageId}`, + kind: 'image', + }); } /** @@ -176,15 +206,68 @@ export async function downloadDocumentImage( return { attachment: null, rejected: 'too_large', - rejectedMessage: formatSizeError(doc.file_size, maxSize), + rejectedMessage: formatSizeError(doc.file_size, maxSize, 'image'), + }; + } + + return downloadFileById(botToken, doc.file_id, messageId, { + maxSize, + mimeOverride: mime, + nameOverride: doc.file_name, + mimeFallback: 'image/jpeg', + nameFallback: `image_${messageId}`, + kind: 'image', + }); +} + +/** + * Download a non-image document from Telegram. + * + * Preserves the user-supplied filename and Telegram-reported MIME so Claude + * can recognize the file format when reading via its Read tool. Pre-checks + * file_size against the max limit before initiating download. + */ +export async function downloadDocument( + botToken: string, + doc: TelegramDocument, + messageId: string, +): Promise { + const maxSize = getMaxFileSize(); + if (doc.file_size && doc.file_size > maxSize) { + return { + attachment: null, + rejected: 'too_large', + rejectedMessage: formatSizeError(doc.file_size, maxSize, 'file'), }; } - return downloadFileById(botToken, doc.file_id, messageId); + return downloadFileById(botToken, doc.file_id, messageId, { + maxSize, + mimeOverride: doc.mime_type, + nameOverride: doc.file_name, + mimeFallback: 'application/octet-stream', + nameFallback: `file_${messageId}`, + kind: 'file', + }); } // ── Internal ───────────────────────────────────────────────── +interface DownloadFileOptions { + /** Maximum allowed size in bytes. */ + maxSize: number; + /** Pre-known MIME (e.g. from TelegramDocument.mime_type). Skips inference. */ + mimeOverride?: string; + /** Pre-known filename (e.g. from TelegramDocument.file_name). Skips path basename. */ + nameOverride?: string; + /** MIME used when neither override nor inferMimeType yields a value. */ + mimeFallback: string; + /** Filename used when neither override nor path basename yields a value. */ + nameFallback: string; + /** Used to format the size-exceeded error message with appropriate hint. */ + kind: 'image' | 'file'; +} + /** * Download a file by its Telegram file_id. * Calls getFile → download URL → binary → base64 FileAttachment. @@ -194,8 +277,15 @@ async function downloadFileById( botToken: string, fileId: string, messageId: string, + opts: DownloadFileOptions, ): Promise { - const maxSize = getMaxImageSize(); + const { maxSize, mimeOverride, nameOverride, mimeFallback, nameFallback, kind } = opts; + const downloadFailMessage = kind === 'image' + ? 'Failed to download image from Telegram.' + : 'Failed to download file from Telegram.'; + const retriesExhaustedMessage = kind === 'image' + ? 'Image download failed after retries.' + : 'File download failed after retries.'; for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { try { @@ -224,7 +314,7 @@ async function downloadFileById( // Pre-check size from API response if (fileSize && fileSize > maxSize) { console.warn(`[telegram-media] File too large: ${fileSize} bytes (max ${maxSize})`); - return { attachment: null, rejected: 'too_large', rejectedMessage: formatSizeError(fileSize, maxSize) }; + return { attachment: null, rejected: 'too_large', rejectedMessage: formatSizeError(fileSize, maxSize, kind) }; } // Step 2: Download the file @@ -239,29 +329,31 @@ async function downloadFileById( await sleep(1000 * Math.pow(2, attempt - 1)); continue; } - return { attachment: null, rejected: 'download_failed', rejectedMessage: 'Failed to download image from Telegram.' }; + return { attachment: null, rejected: 'download_failed', rejectedMessage: downloadFailMessage }; } // Check Content-Length header const contentLength = downloadRes.headers.get('content-length'); if (contentLength && parseInt(contentLength, 10) > maxSize) { console.warn(`[telegram-media] Content-Length exceeds max: ${contentLength}`); - return { attachment: null, rejected: 'too_large', rejectedMessage: formatSizeError(parseInt(contentLength, 10), maxSize) }; + return { attachment: null, rejected: 'too_large', rejectedMessage: formatSizeError(parseInt(contentLength, 10), maxSize, kind) }; } // Step 3: Read buffer and validate actual size const buffer = Buffer.from(await downloadRes.arrayBuffer()); if (buffer.length > maxSize) { console.warn(`[telegram-media] Downloaded buffer too large: ${buffer.length} bytes`); - return { attachment: null, rejected: 'too_large', rejectedMessage: formatSizeError(buffer.length, maxSize) }; + return { attachment: null, rejected: 'too_large', rejectedMessage: formatSizeError(buffer.length, maxSize, kind) }; } - // Step 4: Determine MIME type - const mime = inferMimeType(filePath) || 'image/jpeg'; + // Step 4: Determine MIME and filename — prefer overrides (Telegram-reported metadata), + // then infer from path, then fall back. This preserves original document filenames + // and accurate MIMEs (e.g. application/pdf) so Claude can read them correctly. + const mime = mimeOverride || inferMimeType(filePath) || mimeFallback; + const fileName = nameOverride || filePath.split('/').pop() || nameFallback; // Step 5: Convert to base64 and build FileAttachment const base64 = buffer.toString('base64'); - const fileName = filePath.split('/').pop() || `image_${messageId}`; return { attachment: { @@ -280,18 +372,21 @@ async function downloadFileById( await sleep(1000 * Math.pow(2, attempt - 1)); continue; } - return { attachment: null, rejected: 'download_failed', rejectedMessage: 'Image download failed after retries.' }; + return { attachment: null, rejected: 'download_failed', rejectedMessage: retriesExhaustedMessage }; } } - return { attachment: null, rejected: 'download_failed', rejectedMessage: 'Image download failed after retries.' }; + return { attachment: null, rejected: 'download_failed', rejectedMessage: retriesExhaustedMessage }; } /** Format a human-readable size-exceeded error message. */ -function formatSizeError(actualBytes: number, limitBytes: number): string { +function formatSizeError(actualBytes: number, limitBytes: number, kind: 'image' | 'file'): string { const actualMB = (actualBytes / (1024 * 1024)).toFixed(1); const limitMB = (limitBytes / (1024 * 1024)).toFixed(0); - return `Image too large (${actualMB} MB, limit ${limitMB} MB). Please send as a photo instead of a file.`; + if (kind === 'image') { + return `Image too large (${actualMB} MB, limit ${limitMB} MB). Please send as a photo instead of a file.`; + } + return `File too large (${actualMB} MB, limit ${limitMB} MB).`; } function sleep(ms: number): Promise {