Skip to content
Open
16 changes: 8 additions & 8 deletions .cursor/commands/qa.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,25 +270,25 @@ Once all deployments are complete, present the human with these invoke commands
kernel invoke ts-basic get-page-title --payload '{"url": "https://www.google.com"}'
kernel invoke ts-captcha-solver test-captcha-solver
kernel invoke ts-stagehand teamsize-task --payload '{"company": "Kernel"}'
kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke ts-magnitude mag-url-extract --payload '{"url": "https://en.wikipedia.org/wiki/Special:Random"}'
kernel invoke ts-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
kernel invoke ts-gemini-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke ts-gemini-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke ts-claude-agent-sdk agent-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 3 stories"}'
kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}'
kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}'
kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}'
kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}'

# Python apps
kernel invoke python-basic get-page-title --payload '{"url": "https://www.google.com"}'
kernel invoke python-captcha-solver test-captcha-solver
kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'
kernel invoke python-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke python-anthropic-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke python-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
kernel invoke python-openagi-cua openagi-default-task -p '{"instruction": "Navigate to https://agiopen.org and click the What is Computer Use? button"}'
kernel invoke py-claude-agent-sdk agent-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 3 stories"}'
kernel invoke python-gemini-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}'
kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}'
kernel invoke python-gemini-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}'
kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}'
```

## Step 7: Automated Runtime Testing (Optional)
Expand Down
5 changes: 3 additions & 2 deletions cmd/browsers.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ func getAvailableViewports() []string {
"1440x900@25",
"1024x768@60",
"1200x800@60",
"1280x800@60",
}
}

Expand Down Expand Up @@ -2069,7 +2070,7 @@ func init() {
browsersUpdateCmd.Flags().String("profile-id", "", "Profile ID to load into the browser session (mutually exclusive with --profile-name)")
browsersUpdateCmd.Flags().String("profile-name", "", "Profile name to load into the browser session (mutually exclusive with --profile-id)")
browsersUpdateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends")
browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60")
browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60")

browsersCmd.AddCommand(browsersListCmd)
browsersCmd.AddCommand(browsersCreateCmd)
Expand Down Expand Up @@ -2304,7 +2305,7 @@ func init() {
browsersCreateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends")
browsersCreateCmd.Flags().String("proxy-id", "", "Proxy ID to use for the browser session")
browsersCreateCmd.Flags().StringSlice("extension", []string{}, "Extension IDs or names to load (repeatable; may be passed multiple times or comma-separated)")
browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60")
browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60")
browsersCreateCmd.Flags().Bool("viewport-interactive", false, "Interactively select viewport size from list")
browsersCreateCmd.Flags().String("pool-id", "", "Browser pool ID to acquire from (mutually exclusive with --pool-name)")
browsersCreateCmd.Flags().String("pool-name", "", "Browser pool name to acquire from (mutually exclusive with --pool-id)")
Expand Down
4 changes: 2 additions & 2 deletions cmd/browsers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ func TestBrowsersDelete_Failure(t *testing.T) {
assert.True(t, strings.Contains(errMsg, "right failed") || strings.Contains(errMsg, "left failed"), "expected error message to contain either 'right failed' or 'left failed', got: %s", errMsg)
}


func TestBrowsersView_ByID_PrintsURL(t *testing.T) {
// Capture both pterm output and raw stdout
setupStdoutCapture(t)
Expand Down Expand Up @@ -1147,12 +1146,13 @@ func TestParseViewport_InvalidFormats(t *testing.T) {

func TestGetAvailableViewports_ReturnsExpectedOptions(t *testing.T) {
viewports := getAvailableViewports()
assert.Len(t, viewports, 6)
assert.Len(t, viewports, 7)
assert.Contains(t, viewports, "2560x1440@10")
assert.Contains(t, viewports, "1920x1080@25")
assert.Contains(t, viewports, "1920x1200@25")
assert.Contains(t, viewports, "1440x900@25")
assert.Contains(t, viewports, "1200x800@60")
assert.Contains(t, viewports, "1280x800@60")
assert.Contains(t, viewports, "1024x768@60")
}

Expand Down
4 changes: 3 additions & 1 deletion pkg/templates/python/anthropic-computer-use/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ async def sampling_loop(
tool_version: ToolVersion = "computer_use_20250124",
thinking_budget: int | None = None,
token_efficient_tools_beta: bool = False,
viewport_width: int = 1280,
viewport_height: int = 800,
):
"""
Agentic sampling loop for the assistant/tool interaction of computer use.
Expand All @@ -99,7 +101,7 @@ async def sampling_loop(
tool_group = TOOL_GROUPS_BY_VERSION[tool_version]
tool_collection = ToolCollection(
*(
ToolCls(kernel=kernel, session_id=session_id) if ToolCls.__name__.startswith("ComputerTool") else ToolCls()
ToolCls(kernel=kernel, session_id=session_id, width=viewport_width, height=viewport_height) if ToolCls.__name__.startswith("ComputerTool") else ToolCls()
for ToolCls in tool_group.tools
)
)
Expand Down
8 changes: 5 additions & 3 deletions pkg/templates/python/anthropic-computer-use/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class KernelBrowserSession:
stealth: bool = True
timeout_seconds: int = 300

viewport_width: int = 1280
viewport_height: int = 800

# Replay recording options
record_replay: bool = False
replay_grace_period: float = 5.0 # Seconds to wait before stopping replay
Expand All @@ -52,9 +55,8 @@ async def __aenter__(self) -> "KernelBrowserSession":
stealth=self.stealth,
timeout_seconds=self.timeout_seconds,
viewport={
"width": 1024,
"height": 768,
"refresh_rate": 60,
"width": self.viewport_width,
"height": self.viewport_height,
},
)

Expand Down
6 changes: 3 additions & 3 deletions pkg/templates/python/anthropic-computer-use/tools/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ class BaseComputerTool:
"""

name: Literal["computer"] = "computer"
width: int = 1024
height: int = 768
display_num: int | None = None

# Kernel client and session
Expand All @@ -127,10 +125,12 @@ def options(self) -> ComputerToolOptions:
"display_number": self.display_num,
}

def __init__(self, kernel: Kernel | None = None, session_id: str | None = None):
def __init__(self, kernel: Kernel | None = None, session_id: str | None = None, width: int = 1280, height: int = 800):
super().__init__()
self.kernel = kernel
self.session_id = session_id
self.width = width
self.height = height

def validate_coordinates(self, coordinate: tuple[int, int] | list[int] | None = None) -> tuple[int, int] | None:
"""Validate that coordinates are non-negative integers and convert lists to tuples if needed."""
Expand Down
4 changes: 2 additions & 2 deletions pkg/templates/python/yutori-computer-use/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view

## Viewport Configuration

Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default.
Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy.

> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy.
> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions.

See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations.

Expand Down
4 changes: 2 additions & 2 deletions pkg/templates/python/yutori-computer-use/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ async def screenshot(self) -> ToolResult:

async def sampling_loop(
*,
model: str = "n1-preview-2025-11",
model: str = "n1-latest",
task: str,
api_key: str,
kernel: Kernel,
session_id: str,
cdp_ws_url: Optional[str] = None,
max_tokens: int = 4096,
max_iterations: int = 50,
viewport_width: int = 1200,
viewport_width: int = 1280,
viewport_height: int = 800,
mode: BrowserMode = "computer_use",
) -> dict[str, Any]:
Expand Down
2 changes: 1 addition & 1 deletion pkg/templates/python/yutori-computer-use/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ async def cua_task(
print("Kernel browser live view url:", session.live_view_url)

loop_result = await sampling_loop(
model="n1-preview-2025-11",
model="n1-latest",
task=payload["query"],
api_key=str(api_key),
kernel=session.kernel,
Expand Down
3 changes: 1 addition & 2 deletions pkg/templates/python/yutori-computer-use/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class KernelBrowserSession:
stealth: bool = True
timeout_seconds: int = 300

viewport_width: int = 1200
viewport_width: int = 1280
viewport_height: int = 800

# Replay recording options
Expand All @@ -56,7 +56,6 @@ async def __aenter__(self) -> "KernelBrowserSession":
viewport={
"width": self.viewport_width,
"height": self.viewport_height,
"refresh_rate": 25,
},
)

Expand Down
2 changes: 1 addition & 1 deletion pkg/templates/python/yutori-computer-use/tools/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class N1Action(TypedDict, total=False):


class ComputerTool:
def __init__(self, kernel: Kernel, session_id: str, width: int = 1200, height: int = 800):
def __init__(self, kernel: Kernel, session_id: str, width: int = 1280, height: int = 800):
self.kernel = kernel
self.session_id = session_id
self.width = width
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@


class PlaywrightComputerTool:
def __init__(self, cdp_ws_url: str, width: int = 1200, height: int = 800):
def __init__(self, cdp_ws_url: str, width: int = 1280, height: int = 800):
self.cdp_ws_url = cdp_ws_url
self.width = width
self.height = height
Expand Down
6 changes: 5 additions & 1 deletion pkg/templates/typescript/anthropic-computer-use/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ export async function samplingLoop({
tokenEfficientToolsBeta = false,
kernel,
sessionId,
viewportWidth = 1280,
viewportHeight = 800,
}: {
model: string;
systemPromptSuffix?: string;
Expand All @@ -69,10 +71,12 @@ export async function samplingLoop({
tokenEfficientToolsBeta?: boolean;
kernel: Kernel;
sessionId: string;
viewportWidth?: number;
viewportHeight?: number;
}): Promise<BetaMessageParam[]> {
const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION;
const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion];
const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId)));
const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId, viewportWidth, viewportHeight)));

const system: BetaTextBlock = {
type: 'text',
Expand Down
23 changes: 20 additions & 3 deletions pkg/templates/typescript/anthropic-computer-use/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,28 @@ export interface SessionOptions {
recordReplay?: boolean;
/** Grace period in seconds before stopping replay */
replayGracePeriod?: number;
/** Viewport width */
viewportWidth?: number;
/** Viewport height */
viewportHeight?: number;
}

export interface SessionInfo {
sessionId: string;
liveViewUrl: string;
replayId?: string;
replayViewUrl?: string;
viewportWidth: number;
viewportHeight: number;
}

const DEFAULT_OPTIONS: Required<SessionOptions> = {
stealth: true,
timeoutSeconds: 300,
recordReplay: false,
replayGracePeriod: 5.0,
viewportWidth: 1280,
viewportHeight: 800,
};

/**
Expand Down Expand Up @@ -76,12 +84,22 @@ export class KernelBrowserSession {
return this._replayViewUrl;
}

get viewportWidth(): number {
return this.options.viewportWidth;
}

get viewportHeight(): number {
return this.options.viewportHeight;
}

get info(): SessionInfo {
return {
sessionId: this.sessionId,
liveViewUrl: this._liveViewUrl || '',
replayId: this._replayId || undefined,
replayViewUrl: this._replayViewUrl || undefined,
viewportWidth: this.options.viewportWidth,
viewportHeight: this.options.viewportHeight,
};
}

Expand All @@ -94,9 +112,8 @@ export class KernelBrowserSession {
stealth: this.options.stealth,
timeout_seconds: this.options.timeoutSeconds,
viewport: {
width: 1024,
height: 768,
refresh_rate: 60,
width: this.options.viewportWidth,
height: this.options.viewportHeight,
},
});

Expand Down
18 changes: 11 additions & 7 deletions pkg/templates/typescript/anthropic-computer-use/tools/computer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export class ComputerTool implements BaseAnthropicTool {
protected sessionId: string;
protected _screenshotDelay = 2.0;
protected version: '20241022' | '20250124';
protected width: number;
protected height: number;

private lastMousePosition: [number, number] = [0, 0];

Expand Down Expand Up @@ -39,10 +41,12 @@ export class ComputerTool implements BaseAnthropicTool {
Action.WAIT,
]);

constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124') {
constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124', width = 1280, height = 800) {
this.kernel = kernel;
this.sessionId = sessionId;
this.version = version;
this.width = width;
this.height = height;
}

get apiType(): 'computer_20241022' | 'computer_20250124' {
Expand All @@ -53,8 +57,8 @@ export class ComputerTool implements BaseAnthropicTool {
const params = {
name: this.name,
type: this.apiType,
display_width_px: 1024,
display_height_px: 768,
display_width_px: this.width,
display_height_px: this.height,
display_number: null,
};
return params;
Expand Down Expand Up @@ -380,13 +384,13 @@ export class ComputerTool implements BaseAnthropicTool {

// For backward compatibility
export class ComputerTool20241022 extends ComputerTool {
constructor(kernel: Kernel, sessionId: string) {
super(kernel, sessionId, '20241022');
constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) {
super(kernel, sessionId, '20241022', width, height);
}
}

export class ComputerTool20250124 extends ComputerTool {
constructor(kernel: Kernel, sessionId: string) {
super(kernel, sessionId, '20250124');
constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) {
super(kernel, sessionId, '20250124', width, height);
}
}
Loading