From 3c07a73bda27e698d7adf4a84b87d06116b786d0 Mon Sep 17 00:00:00 2001 From: P3il4 <42489293+P3il4@users.noreply.github.com> Date: Fri, 27 Mar 2026 19:07:41 +0300 Subject: [PATCH 1/2] support video inputs in .chat --- .../src/services/ai/utils/OpenAIUtil.js | 18 ++++++++----- src/puter-js/src/lib/utils.js | 11 +++++++- src/puter-js/src/modules/AI.js | 25 ++++++++++--------- src/puter-js/types/modules/ai.d.ts | 2 +- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/backend/src/services/ai/utils/OpenAIUtil.js b/src/backend/src/services/ai/utils/OpenAIUtil.js index f517401ef2..bab8f537f5 100644 --- a/src/backend/src/services/ai/utils/OpenAIUtil.js +++ b/src/backend/src/services/ai/utils/OpenAIUtil.js @@ -13,9 +13,12 @@ export const process_input_messages = async (messages) => { const content = msg.content; for ( const o of content ) { - if ( ! o['image_url'] ) continue; - if ( o.type ) continue; - o.type = 'image_url'; + if ( o['image_url'] && !o.type ) { + o.type = 'image_url'; + } + if ( o['video_url'] && !o.type ) { + o.type = 'video_url'; + } } // coerce tool calls @@ -93,9 +96,12 @@ export const process_input_messages_responses_api = async (messages) => { const content = msg.content; for ( const o of content ) { - if ( ! o['image_url'] ) continue; - if ( o.type ) continue; - o.type = 'image_url'; + if ( o['image_url'] && !o.type ) { + o.type = 'image_url'; + } + if ( o['video_url'] && !o.type ) { + o.type = 'video_url'; + } } // coerce tool calls diff --git a/src/puter-js/src/lib/utils.js b/src/puter-js/src/lib/utils.js index 3ee6a0f0a9..0bdfc63752 100644 --- a/src/puter-js/src/lib/utils.js +++ b/src/puter-js/src/lib/utils.js @@ -632,6 +632,15 @@ function arrayBufferToDataUri (arrayBuffer) { }); } +const VIDEO_EXTENSIONS = ['mp4', 'webm', 'mov', 'mpeg', 'avi', 'mkv', 'm4v', 'ogv']; + +const isVideoInput = (url) => { + if ( typeof url !== 'string' ) return false; + if ( url.startsWith('data:video/') ) return true; + const ext = url.split('?')[0].split('#')[0].split('.').pop()?.toLowerCase(); + return VIDEO_EXTENSIONS.includes(ext); +}; + export { - arrayBufferToDataUri, blob_to_url, blobToDataUri, driverCall, handle_error, handle_resp, initXhr, make_driver_method, parseResponse, setupXhrEventHandlers, uuidv4, + arrayBufferToDataUri, blob_to_url, blobToDataUri, driverCall, handle_error, handle_resp, initXhr, isVideoInput, make_driver_method, parseResponse, setupXhrEventHandlers, uuidv4, }; diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index f5eadcf062..352e4579a1 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -647,36 +647,37 @@ class AI { requestParams = { messages: [{ content: args[0] }] }; } - // ai.chat(prompt, imageURL/File) - // ai.chat(prompt, imageURL/File, testMode) + // ai.chat(prompt, mediaURL/File) + // ai.chat(prompt, mediaURL/File, testMode) else if ( typeof args[0] === 'string' && (typeof args[1] === 'string' || args[1] instanceof File) ) { - // if imageURL is a File, transform it to a data URI + // if mediaURL is a File, transform it to a data URI if ( args[1] instanceof File ) { args[1] = await utils.blobToDataUri(args[1]); } - // parse args[1] as an image_url object + const mediaBlock = utils.isVideoInput(args[1]) + ? { video_url: { url: args[1] } } + : { image_url: { url: args[1] } }; + requestParams = { vision: true, messages: [ { content: [ args[0], - { - image_url: { - url: args[1], - }, - }, + mediaBlock, ], }, ], }; } - // chat(prompt, [imageURLs]) + // chat(prompt, [mediaURLs]) else if ( typeof args[0] === 'string' && Array.isArray(args[1]) ) { - // parse args[1] as an array of image_url objects for ( let i = 0; i < args[1].length; i++ ) { - args[1][i] = { image_url: { url: args[1][i] } }; + const url = args[1][i]; + args[1][i] = utils.isVideoInput(url) + ? { video_url: { url } } + : { image_url: { url } }; } requestParams = { vision: true, diff --git a/src/puter-js/types/modules/ai.d.ts b/src/puter-js/types/modules/ai.d.ts index e97d0995a2..c11953163b 100644 --- a/src/puter-js/types/modules/ai.d.ts +++ b/src/puter-js/types/modules/ai.d.ts @@ -1,4 +1,4 @@ -export type AIMessageContent = string | { image_url?: { url: string } } | Record; +export type AIMessageContent = string | { image_url?: { url: string } } | { video_url?: { url: string } } | Record; export interface ChatMessage { role?: string; From a70a93b5a3b3df441e7fc59fa815e5028558f0c1 Mon Sep 17 00:00:00 2001 From: P3il4 <42489293+P3il4@users.noreply.github.com> Date: Fri, 27 Mar 2026 20:03:23 +0300 Subject: [PATCH 2/2] update docs for .chat --- src/docs/src/AI/chat.md | 31 ++++++++++++++----- src/docs/src/examples.js | 6 ++++ .../examples/ai-video-analysis.html | 16 ++++++++++ 3 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 src/docs/src/playground/examples/ai-video-analysis.html diff --git a/src/docs/src/AI/chat.md b/src/docs/src/AI/chat.md index 8a1f863777..0e637aa520 100755 --- a/src/docs/src/AI/chat.md +++ b/src/docs/src/AI/chat.md @@ -1,6 +1,6 @@ --- title: puter.ai.chat() -description: Chat with AI models, analyze images, and perform function calls using 500+ models from OpenAI, Anthropic, Google, and more. +description: Chat with AI models, analyze images and videos, and perform function calls using 500+ models from OpenAI, Anthropic, Google, and more. platforms: [websites, apps, nodejs, workers] --- @@ -12,8 +12,8 @@ Given a prompt returns the completion that best matches the prompt. puter.ai.chat(prompt) puter.ai.chat(prompt, options = {}) puter.ai.chat(prompt, testMode = false, options = {}) -puter.ai.chat(prompt, image, testMode = false, options = {}) -puter.ai.chat(prompt, [imageURLArray], testMode = false, options = {}) +puter.ai.chat(prompt, media, testMode = false, options = {}) +puter.ai.chat(prompt, [mediaURLArray], testMode = false, options = {}) puter.ai.chat([messages], testMode = false, options = {}) ``` @@ -39,13 +39,13 @@ An object containing the following properties: A boolean indicating whether you want to use the test API. Defaults to `false`. This is useful for testing your code without using up API credits. -#### `image` (String | File) +#### `media` (String | File) -A string containing the URL or Puter path of the image, or a `File` object containing the image you want to provide as context for the completion. +A string containing the URL or Puter path of an image or video, or a `File` object containing the media you want to provide as context for the completion. -#### `imageURLArray` (Array) +#### `mediaURLArray` (Array) -An array of strings containing the URLs of images you want to provide as context for the completion. +An array of strings containing the URLs of images or videos you want to provide as context for the completion. #### `messages` (Array) @@ -194,6 +194,23 @@ List of OpenAI models that support the web search can be found in their [API com ``` +Video Analysis + +```html;ai-video-analysis + + + + + + +``` + Stream the response ```html;ai-chat-stream diff --git a/src/docs/src/examples.js b/src/docs/src/examples.js index c40317b236..f9a7442bbb 100644 --- a/src/docs/src/examples.js +++ b/src/docs/src/examples.js @@ -55,6 +55,12 @@ const examples = [ slug: 'ai-gpt-vision', source: '/playground/examples/ai-gpt-vision.html', }, + { + title: 'Video Analysis', + description: 'Analyze videos with AI using Puter.js. Run and modify this video analysis example instantly in your browser.', + slug: 'ai-video-analysis', + source: '/playground/examples/ai-video-analysis.html', + }, { title: 'Stream the response', description: 'Stream AI chat responses in real-time with Puter.js. Run and experiment with this streaming example in the playground.', diff --git a/src/docs/src/playground/examples/ai-video-analysis.html b/src/docs/src/playground/examples/ai-video-analysis.html new file mode 100644 index 0000000000..32ec871af0 --- /dev/null +++ b/src/docs/src/playground/examples/ai-video-analysis.html @@ -0,0 +1,16 @@ + + + + + +