Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions src/backend/src/services/ai/utils/OpenAIUtil.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ export const process_input_messages = async (messages) => {
const content = msg.content;

for ( const o of content ) {
if ( ! o['image_url'] ) continue;
if ( o.type ) continue;
o.type = 'image_url';
if ( o['image_url'] && !o.type ) {
o.type = 'image_url';
}
if ( o['video_url'] && !o.type ) {
o.type = 'video_url';
}
}

// coerce tool calls
Expand Down Expand Up @@ -93,9 +96,12 @@ export const process_input_messages_responses_api = async (messages) => {
const content = msg.content;

for ( const o of content ) {
if ( ! o['image_url'] ) continue;
if ( o.type ) continue;
o.type = 'image_url';
if ( o['image_url'] && !o.type ) {
o.type = 'image_url';
}
if ( o['video_url'] && !o.type ) {
o.type = 'video_url';
}
}

// coerce tool calls
Expand Down
31 changes: 24 additions & 7 deletions src/docs/src/AI/chat.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: puter.ai.chat()
description: Chat with AI models, analyze images, and perform function calls using 500+ models from OpenAI, Anthropic, Google, and more.
description: Chat with AI models, analyze images and videos, and perform function calls using 500+ models from OpenAI, Anthropic, Google, and more.
platforms: [websites, apps, nodejs, workers]
---

Expand All @@ -12,8 +12,8 @@ Given a prompt returns the completion that best matches the prompt.
puter.ai.chat(prompt)
puter.ai.chat(prompt, options = {})
puter.ai.chat(prompt, testMode = false, options = {})
puter.ai.chat(prompt, image, testMode = false, options = {})
puter.ai.chat(prompt, [imageURLArray], testMode = false, options = {})
puter.ai.chat(prompt, media, testMode = false, options = {})
puter.ai.chat(prompt, [mediaURLArray], testMode = false, options = {})
puter.ai.chat([messages], testMode = false, options = {})
```

Expand All @@ -39,13 +39,13 @@ An object containing the following properties:

A boolean indicating whether you want to use the test API. Defaults to `false`. This is useful for testing your code without using up API credits.

#### `image` (String | File)
#### `media` (String | File)

A string containing the URL or Puter path of the image, or a `File` object containing the image you want to provide as context for the completion.
A string containing the URL or Puter path of an image or video, or a `File` object containing the media you want to provide as context for the completion.

#### `imageURLArray` (Array)
#### `mediaURLArray` (Array)

An array of strings containing the URLs of images you want to provide as context for the completion.
An array of strings containing the URLs of images or videos you want to provide as context for the completion.

#### `messages` (Array)

Expand Down Expand Up @@ -194,6 +194,23 @@ List of OpenAI models that support the web search can be found in their [API com
</html>
```

<strong class="example-title">Video Analysis</strong>

```html;ai-video-analysis
<html>
<body>
<script src="https://js.puter.com/v2/"></script>
<script>
puter.ai
.chat(`What do you see?`, `https://assets.puter.site/puppy.mp4`, {
model: "reka/reka-edge",
})
.then(puter.print);
</script>
</body>
</html>
```

<strong class="example-title">Stream the response</strong>

```html;ai-chat-stream
Expand Down
6 changes: 6 additions & 0 deletions src/docs/src/examples.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ const examples = [
slug: 'ai-gpt-vision',
source: '/playground/examples/ai-gpt-vision.html',
},
{
title: 'Video Analysis',
description: 'Analyze videos with AI using Puter.js. Run and modify this video analysis example instantly in your browser.',
slug: 'ai-video-analysis',
source: '/playground/examples/ai-video-analysis.html',
},
{
title: 'Stream the response',
description: 'Stream AI chat responses in real-time with Puter.js. Run and experiment with this streaming example in the playground.',
Expand Down
16 changes: 16 additions & 0 deletions src/docs/src/playground/examples/ai-video-analysis.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<html>
<body>
<script src="https://js.puter.com/v2/"></script>
<script>
// Loading ...
puter.print(`Loading...`);

// Video analysis with Reka Edge
puter.ai
.chat(`What do you see?`, `https://assets.puter.site/puppy.mp4`, {
model: "reka/reka-edge",
})
.then(puter.print);
</script>
</body>
</html>
11 changes: 10 additions & 1 deletion src/puter-js/src/lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,15 @@ function arrayBufferToDataUri (arrayBuffer) {
});
}

const VIDEO_EXTENSIONS = ['mp4', 'webm', 'mov', 'mpeg', 'avi', 'mkv', 'm4v', 'ogv'];

const isVideoInput = (url) => {
if ( typeof url !== 'string' ) return false;
if ( url.startsWith('data:video/') ) return true;
const ext = url.split('?')[0].split('#')[0].split('.').pop()?.toLowerCase();
return VIDEO_EXTENSIONS.includes(ext);
};

export {
arrayBufferToDataUri, blob_to_url, blobToDataUri, driverCall, handle_error, handle_resp, initXhr, make_driver_method, parseResponse, setupXhrEventHandlers, uuidv4,
arrayBufferToDataUri, blob_to_url, blobToDataUri, driverCall, handle_error, handle_resp, initXhr, isVideoInput, make_driver_method, parseResponse, setupXhrEventHandlers, uuidv4,
};
25 changes: 13 additions & 12 deletions src/puter-js/src/modules/AI.js
Original file line number Diff line number Diff line change
Expand Up @@ -647,36 +647,37 @@ class AI {
requestParams = { messages: [{ content: args[0] }] };
}

// ai.chat(prompt, imageURL/File)
// ai.chat(prompt, imageURL/File, testMode)
// ai.chat(prompt, mediaURL/File)
// ai.chat(prompt, mediaURL/File, testMode)
else if ( typeof args[0] === 'string' && (typeof args[1] === 'string' || args[1] instanceof File) ) {
// if imageURL is a File, transform it to a data URI
// if mediaURL is a File, transform it to a data URI
if ( args[1] instanceof File ) {
args[1] = await utils.blobToDataUri(args[1]);
}

// parse args[1] as an image_url object
const mediaBlock = utils.isVideoInput(args[1])
? { video_url: { url: args[1] } }
: { image_url: { url: args[1] } };

requestParams = {
vision: true,
messages: [
{
content: [
args[0],
{
image_url: {
url: args[1],
},
},
mediaBlock,
],
},
],
};
}
// chat(prompt, [imageURLs])
// chat(prompt, [mediaURLs])
else if ( typeof args[0] === 'string' && Array.isArray(args[1]) ) {
// parse args[1] as an array of image_url objects
for ( let i = 0; i < args[1].length; i++ ) {
args[1][i] = { image_url: { url: args[1][i] } };
const url = args[1][i];
args[1][i] = utils.isVideoInput(url)
? { video_url: { url } }
: { image_url: { url } };
}
requestParams = {
vision: true,
Expand Down
2 changes: 1 addition & 1 deletion src/puter-js/types/modules/ai.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export type AIMessageContent = string | { image_url?: { url: string } } | Record<string, unknown>;
export type AIMessageContent = string | { image_url?: { url: string } } | { video_url?: { url: string } } | Record<string, unknown>;

export interface ChatMessage {
role?: string;
Expand Down
Loading