From 32ed4b5c9c876136a743f57d36ec00ca101bba74 Mon Sep 17 00:00:00 2001 From: Tanmay Sardesai Date: Tue, 27 Jan 2026 23:51:16 -0800 Subject: [PATCH 01/13] feat: add 1280x800 viewport support and update Yutori templates default - Add 1280x800@60 viewport option to browser create/update commands - Update Yutori computer-use templates (TypeScript & Python) to use 1280x800 as default viewport - Update documentation and help text to reflect new viewport option --- cmd/browsers.go | 5 +++-- cmd/browsers_test.go | 2 +- pkg/templates/python/yutori-computer-use/README.md | 4 ++-- pkg/templates/python/yutori-computer-use/session.py | 2 +- pkg/templates/python/yutori-computer-use/tools/computer.py | 2 +- .../python/yutori-computer-use/tools/playwright_computer.py | 2 +- pkg/templates/typescript/yutori-computer-use/README.md | 4 ++-- pkg/templates/typescript/yutori-computer-use/loop.ts | 5 ++--- pkg/templates/typescript/yutori-computer-use/session.ts | 2 +- .../typescript/yutori-computer-use/tools/computer.ts | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cmd/browsers.go b/cmd/browsers.go index 7555d91..904de0f 100644 --- a/cmd/browsers.go +++ b/cmd/browsers.go @@ -126,6 +126,7 @@ func getAvailableViewports() []string { "1440x900@25", "1024x768@60", "1200x800@60", + "1280x800@60", } } @@ -2069,7 +2070,7 @@ func init() { browsersUpdateCmd.Flags().String("profile-id", "", "Profile ID to load into the browser session (mutually exclusive with --profile-name)") browsersUpdateCmd.Flags().String("profile-name", "", "Profile name to load into the browser session (mutually exclusive with --profile-id)") browsersUpdateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends") - browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60") + browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60") browsersCmd.AddCommand(browsersListCmd) browsersCmd.AddCommand(browsersCreateCmd) @@ -2304,7 +2305,7 @@ func init() { browsersCreateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends") browsersCreateCmd.Flags().String("proxy-id", "", "Proxy ID to use for the browser session") browsersCreateCmd.Flags().StringSlice("extension", []string{}, "Extension IDs or names to load (repeatable; may be passed multiple times or comma-separated)") - browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60") + browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60") browsersCreateCmd.Flags().Bool("viewport-interactive", false, "Interactively select viewport size from list") browsersCreateCmd.Flags().String("pool-id", "", "Browser pool ID to acquire from (mutually exclusive with --pool-name)") browsersCreateCmd.Flags().String("pool-name", "", "Browser pool name to acquire from (mutually exclusive with --pool-id)") diff --git a/cmd/browsers_test.go b/cmd/browsers_test.go index 447b6bd..eb3fa00 100644 --- a/cmd/browsers_test.go +++ b/cmd/browsers_test.go @@ -280,7 +280,6 @@ func TestBrowsersDelete_Failure(t *testing.T) { assert.True(t, strings.Contains(errMsg, "right failed") || strings.Contains(errMsg, "left failed"), "expected error message to contain either 'right failed' or 'left failed', got: %s", errMsg) } - func TestBrowsersView_ByID_PrintsURL(t *testing.T) { // Capture both pterm output and raw stdout setupStdoutCapture(t) @@ -1153,6 +1152,7 @@ func TestGetAvailableViewports_ReturnsExpectedOptions(t *testing.T) { assert.Contains(t, viewports, "1920x1200@25") assert.Contains(t, viewports, "1440x900@25") assert.Contains(t, viewports, "1200x800@60") + assert.Contains(t, viewports, "1280x800@60") assert.Contains(t, viewports, "1024x768@60") } diff --git a/pkg/templates/python/yutori-computer-use/README.md b/pkg/templates/python/yutori-computer-use/README.md index 2f8ec2f..2b52fd0 100644 --- a/pkg/templates/python/yutori-computer-use/README.md +++ b/pkg/templates/python/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index f4f2d01..bffa17b 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -32,7 +32,7 @@ class KernelBrowserSession: stealth: bool = True timeout_seconds: int = 300 - viewport_width: int = 1200 + viewport_width: int = 1280 viewport_height: int = 800 # Replay recording options diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori-computer-use/tools/computer.py index 4460161..e72f191 100644 --- a/pkg/templates/python/yutori-computer-use/tools/computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/computer.py @@ -91,7 +91,7 @@ class N1Action(TypedDict, total=False): class ComputerTool: - def __init__(self, kernel: Kernel, session_id: str, width: int = 1200, height: int = 800): + def __init__(self, kernel: Kernel, session_id: str, width: int = 1280, height: int = 800): self.kernel = kernel self.session_id = session_id self.width = width diff --git a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py index df98628..c0b8839 100644 --- a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py @@ -38,7 +38,7 @@ class PlaywrightComputerTool: - def __init__(self, cdp_ws_url: str, width: int = 1200, height: int = 800): + def __init__(self, cdp_ws_url: str, width: int = 1280, height: int = 800): self.cdp_ws_url = cdp_ws_url self.width = width self.height = height diff --git a/pkg/templates/typescript/yutori-computer-use/README.md b/pkg/templates/typescript/yutori-computer-use/README.md index 625c94d..60bcd25 100644 --- a/pkg/templates/typescript/yutori-computer-use/README.md +++ b/pkg/templates/typescript/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index 351aa9c..c53fd3c 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -53,7 +53,7 @@ interface SamplingLoopOptions { cdpWsUrl?: string; maxTokens?: number; maxIterations?: number; - /** Viewport width for coordinate scaling (default: 1200, closest to Yutori's 1280 recommendation) */ + /** Viewport width for coordinate scaling (default: 1280 per Yutori recommendation) */ viewportWidth?: number; /** Viewport height for coordinate scaling (default: 800 per Yutori recommendation) */ viewportHeight?: number; @@ -80,8 +80,7 @@ export async function samplingLoop({ cdpWsUrl, maxTokens = 4096, maxIterations = 50, - // Default viewport: 1200x800 (closest Kernel-supported size to Yutori's recommended 1280x800) - viewportWidth = 1200, + viewportWidth = 1280, viewportHeight = 800, mode = 'computer_use', }: SamplingLoopOptions): Promise { diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 3a3c567..24b1b9b 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -16,7 +16,7 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; - /** Viewport width (default: 1200, closest to Yutori's 1280 recommendation) */ + /** Viewport width (default: 1280 per Yutori recommendation) */ viewportWidth?: number; /** Viewport height (default: 800 per Yutori recommendation) */ viewportHeight?: number; diff --git a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts index 46fd76e..e9cdaf3 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts @@ -98,7 +98,7 @@ export class ComputerTool { private width: number; private height: number; - constructor(kernel: Kernel, sessionId: string, width = 1200, height = 800) { + constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { this.kernel = kernel; this.sessionId = sessionId; this.width = width; From a17c0b57bfaa3b8ab26248f1bf168a03e1b0ee95 Mon Sep 17 00:00:00 2001 From: Tanmay Sardesai Date: Tue, 27 Jan 2026 23:59:09 -0800 Subject: [PATCH 02/13] test: update viewport count expectation to 7 --- cmd/browsers_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/browsers_test.go b/cmd/browsers_test.go index eb3fa00..49d770d 100644 --- a/cmd/browsers_test.go +++ b/cmd/browsers_test.go @@ -1146,7 +1146,7 @@ func TestParseViewport_InvalidFormats(t *testing.T) { func TestGetAvailableViewports_ReturnsExpectedOptions(t *testing.T) { viewports := getAvailableViewports() - assert.Len(t, viewports, 6) + assert.Len(t, viewports, 7) assert.Contains(t, viewports, "2560x1440@10") assert.Contains(t, viewports, "1920x1080@25") assert.Contains(t, viewports, "1920x1200@25") From 99d10e29b610fc42b708fb851b754038d948bbaf Mon Sep 17 00:00:00 2001 From: Tanmay Sardesai Date: Wed, 28 Jan 2026 00:12:31 -0800 Subject: [PATCH 03/13] missed some more changes --- pkg/templates/python/yutori-computer-use/loop.py | 2 +- pkg/templates/typescript/yutori-computer-use/session.ts | 2 +- .../typescript/yutori-computer-use/tools/playwright-computer.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index 236d4ad..ef5748c 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -49,7 +49,7 @@ async def sampling_loop( cdp_ws_url: Optional[str] = None, max_tokens: int = 4096, max_iterations: int = 50, - viewport_width: int = 1200, + viewport_width: int = 1280, viewport_height: int = 800, mode: BrowserMode = "computer_use", ) -> dict[str, Any]: diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 24b1b9b..0621aad 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -37,7 +37,7 @@ const DEFAULT_OPTIONS: Required = { timeoutSeconds: 300, recordReplay: false, replayGracePeriod: 5.0, - viewportWidth: 1200, + viewportWidth: 1280, viewportHeight: 800, }; diff --git a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts index d6ce229..3062c44 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts @@ -36,7 +36,7 @@ export class PlaywrightComputerTool { private context: BrowserContext | null = null; private page: Page | null = null; - constructor(cdpWsUrl: string, width = 1200, height = 800) { + constructor(cdpWsUrl: string, width = 1280, height = 800) { this.cdpWsUrl = cdpWsUrl; this.width = width; this.height = height; From b582c8193cdd6f5b6db6b145f23410193b344fd2 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Wed, 28 Jan 2026 10:25:33 -0500 Subject: [PATCH 04/13] refactor: remove refresh_rate from viewport settings in TypeScript and Python templates - Removed the `refresh_rate` property from the viewport configuration in both TypeScript and Python templates for the Anthropic and Yutori computer use sessions. - This change simplifies the viewport settings and aligns with the current requirements. --- pkg/templates/python/anthropic-computer-use/session.py | 1 - pkg/templates/python/yutori-computer-use/session.py | 1 - pkg/templates/typescript/anthropic-computer-use/session.ts | 1 - pkg/templates/typescript/yutori-computer-use/session.ts | 1 - 4 files changed, 4 deletions(-) diff --git a/pkg/templates/python/anthropic-computer-use/session.py b/pkg/templates/python/anthropic-computer-use/session.py index 3227b28..e01ab4b 100644 --- a/pkg/templates/python/anthropic-computer-use/session.py +++ b/pkg/templates/python/anthropic-computer-use/session.py @@ -54,7 +54,6 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": 1024, "height": 768, - "refresh_rate": 60, }, ) diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index bffa17b..42dc017 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -56,7 +56,6 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": self.viewport_width, "height": self.viewport_height, - "refresh_rate": 25, }, ) diff --git a/pkg/templates/typescript/anthropic-computer-use/session.ts b/pkg/templates/typescript/anthropic-computer-use/session.ts index 06e30a6..96bbdbb 100644 --- a/pkg/templates/typescript/anthropic-computer-use/session.ts +++ b/pkg/templates/typescript/anthropic-computer-use/session.ts @@ -96,7 +96,6 @@ export class KernelBrowserSession { viewport: { width: 1024, height: 768, - refresh_rate: 60, }, }); diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 0621aad..e106461 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -117,7 +117,6 @@ export class KernelBrowserSession { viewport: { width: this.options.viewportWidth, height: this.options.viewportHeight, - refresh_rate: 25, }, }); From 152c9615883e00a3db1f13eb0063c3f252071eec Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Wed, 28 Jan 2026 10:44:14 -0500 Subject: [PATCH 05/13] Remove default viewport size comments from options Updated comments in SamplingLoopOptions and SessionOptions to remove references to default viewport width and height values, clarifying that these fields are for coordinate scaling and viewport size without specifying defaults. --- pkg/templates/typescript/yutori-computer-use/loop.ts | 4 ++-- pkg/templates/typescript/yutori-computer-use/session.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index c53fd3c..34cdc23 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -53,9 +53,9 @@ interface SamplingLoopOptions { cdpWsUrl?: string; maxTokens?: number; maxIterations?: number; - /** Viewport width for coordinate scaling (default: 1280 per Yutori recommendation) */ + /** Viewport width for coordinate scaling */ viewportWidth?: number; - /** Viewport height for coordinate scaling (default: 800 per Yutori recommendation) */ + /** Viewport height for coordinate scaling */ viewportHeight?: number; /** * Browser interaction mode: diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index e106461..d3324f0 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -16,9 +16,9 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; - /** Viewport width (default: 1280 per Yutori recommendation) */ + /** Viewport width */ viewportWidth?: number; - /** Viewport height (default: 800 per Yutori recommendation) */ + /** Viewport height */ viewportHeight?: number; } From 5c060e2f6709d6d426f393ccddbca1d3fa81dfed Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Wed, 28 Jan 2026 10:45:57 -0500 Subject: [PATCH 06/13] Add configurable viewport sizes + new defaults for Anthropic Templates Introduces viewportWidth and viewportHeight parameters to both Python and TypeScript anthropic templates, allowing the viewport size to be set when initializing sessions and tools. Updates default values to 1280x800 and ensures these values are used throughout session creation and tool instantiation. --- .../python/anthropic-computer-use/loop.py | 4 +++- .../python/anthropic-computer-use/session.py | 7 ++++-- .../anthropic-computer-use/tools/computer.py | 6 ++--- .../typescript/anthropic-computer-use/loop.ts | 6 ++++- .../anthropic-computer-use/session.ts | 22 +++++++++++++++++-- .../anthropic-computer-use/tools/computer.ts | 18 +++++++++------ 6 files changed, 47 insertions(+), 16 deletions(-) diff --git a/pkg/templates/python/anthropic-computer-use/loop.py b/pkg/templates/python/anthropic-computer-use/loop.py index 4062088..afee6f6 100644 --- a/pkg/templates/python/anthropic-computer-use/loop.py +++ b/pkg/templates/python/anthropic-computer-use/loop.py @@ -78,6 +78,8 @@ async def sampling_loop( tool_version: ToolVersion = "computer_use_20250124", thinking_budget: int | None = None, token_efficient_tools_beta: bool = False, + viewport_width: int = 1280, + viewport_height: int = 800, ): """ Agentic sampling loop for the assistant/tool interaction of computer use. @@ -99,7 +101,7 @@ async def sampling_loop( tool_group = TOOL_GROUPS_BY_VERSION[tool_version] tool_collection = ToolCollection( *( - ToolCls(kernel=kernel, session_id=session_id) if ToolCls.__name__.startswith("ComputerTool") else ToolCls() + ToolCls(kernel=kernel, session_id=session_id, width=viewport_width, height=viewport_height) if ToolCls.__name__.startswith("ComputerTool") else ToolCls() for ToolCls in tool_group.tools ) ) diff --git a/pkg/templates/python/anthropic-computer-use/session.py b/pkg/templates/python/anthropic-computer-use/session.py index e01ab4b..4718dbd 100644 --- a/pkg/templates/python/anthropic-computer-use/session.py +++ b/pkg/templates/python/anthropic-computer-use/session.py @@ -32,6 +32,9 @@ class KernelBrowserSession: stealth: bool = True timeout_seconds: int = 300 + viewport_width: int = 1280 + viewport_height: int = 800 + # Replay recording options record_replay: bool = False replay_grace_period: float = 5.0 # Seconds to wait before stopping replay @@ -52,8 +55,8 @@ async def __aenter__(self) -> "KernelBrowserSession": stealth=self.stealth, timeout_seconds=self.timeout_seconds, viewport={ - "width": 1024, - "height": 768, + "width": self.viewport_width, + "height": self.viewport_height, }, ) diff --git a/pkg/templates/python/anthropic-computer-use/tools/computer.py b/pkg/templates/python/anthropic-computer-use/tools/computer.py index 654a289..d4a46d7 100644 --- a/pkg/templates/python/anthropic-computer-use/tools/computer.py +++ b/pkg/templates/python/anthropic-computer-use/tools/computer.py @@ -107,8 +107,6 @@ class BaseComputerTool: """ name: Literal["computer"] = "computer" - width: int = 1024 - height: int = 768 display_num: int | None = None # Kernel client and session @@ -127,10 +125,12 @@ def options(self) -> ComputerToolOptions: "display_number": self.display_num, } - def __init__(self, kernel: Kernel | None = None, session_id: str | None = None): + def __init__(self, kernel: Kernel | None = None, session_id: str | None = None, width: int = 1280, height: int = 800): super().__init__() self.kernel = kernel self.session_id = session_id + self.width = width + self.height = height def validate_coordinates(self, coordinate: tuple[int, int] | list[int] | None = None) -> tuple[int, int] | None: """Validate that coordinates are non-negative integers and convert lists to tuples if needed.""" diff --git a/pkg/templates/typescript/anthropic-computer-use/loop.ts b/pkg/templates/typescript/anthropic-computer-use/loop.ts index fa775d9..cc209d1 100644 --- a/pkg/templates/typescript/anthropic-computer-use/loop.ts +++ b/pkg/templates/typescript/anthropic-computer-use/loop.ts @@ -57,6 +57,8 @@ export async function samplingLoop({ tokenEfficientToolsBeta = false, kernel, sessionId, + viewportWidth = 1280, + viewportHeight = 800, }: { model: string; systemPromptSuffix?: string; @@ -69,10 +71,12 @@ export async function samplingLoop({ tokenEfficientToolsBeta?: boolean; kernel: Kernel; sessionId: string; + viewportWidth?: number; + viewportHeight?: number; }): Promise { const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION; const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion]; - const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId))); + const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId, viewportWidth, viewportHeight))); const system: BetaTextBlock = { type: 'text', diff --git a/pkg/templates/typescript/anthropic-computer-use/session.ts b/pkg/templates/typescript/anthropic-computer-use/session.ts index 96bbdbb..fed3dd4 100644 --- a/pkg/templates/typescript/anthropic-computer-use/session.ts +++ b/pkg/templates/typescript/anthropic-computer-use/session.ts @@ -16,6 +16,10 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; + /** Viewport width */ + viewportWidth?: number; + /** Viewport height */ + viewportHeight?: number; } export interface SessionInfo { @@ -23,6 +27,8 @@ export interface SessionInfo { liveViewUrl: string; replayId?: string; replayViewUrl?: string; + viewportWidth: number; + viewportHeight: number; } const DEFAULT_OPTIONS: Required = { @@ -30,6 +36,8 @@ const DEFAULT_OPTIONS: Required = { timeoutSeconds: 300, recordReplay: false, replayGracePeriod: 5.0, + viewportWidth: 1280, + viewportHeight: 800, }; /** @@ -76,12 +84,22 @@ export class KernelBrowserSession { return this._replayViewUrl; } + get viewportWidth(): number { + return this.options.viewportWidth; + } + + get viewportHeight(): number { + return this.options.viewportHeight; + } + get info(): SessionInfo { return { sessionId: this.sessionId, liveViewUrl: this._liveViewUrl || '', replayId: this._replayId || undefined, replayViewUrl: this._replayViewUrl || undefined, + viewportWidth: this.options.viewportWidth, + viewportHeight: this.options.viewportHeight, }; } @@ -94,8 +112,8 @@ export class KernelBrowserSession { stealth: this.options.stealth, timeout_seconds: this.options.timeoutSeconds, viewport: { - width: 1024, - height: 768, + width: this.options.viewportWidth, + height: this.options.viewportHeight, }, }); diff --git a/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts b/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts index 8e415ad..580ea23 100644 --- a/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/anthropic-computer-use/tools/computer.ts @@ -12,6 +12,8 @@ export class ComputerTool implements BaseAnthropicTool { protected sessionId: string; protected _screenshotDelay = 2.0; protected version: '20241022' | '20250124'; + protected width: number; + protected height: number; private lastMousePosition: [number, number] = [0, 0]; @@ -39,10 +41,12 @@ export class ComputerTool implements BaseAnthropicTool { Action.WAIT, ]); - constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124') { + constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124', width = 1280, height = 800) { this.kernel = kernel; this.sessionId = sessionId; this.version = version; + this.width = width; + this.height = height; } get apiType(): 'computer_20241022' | 'computer_20250124' { @@ -53,8 +57,8 @@ export class ComputerTool implements BaseAnthropicTool { const params = { name: this.name, type: this.apiType, - display_width_px: 1024, - display_height_px: 768, + display_width_px: this.width, + display_height_px: this.height, display_number: null, }; return params; @@ -380,13 +384,13 @@ export class ComputerTool implements BaseAnthropicTool { // For backward compatibility export class ComputerTool20241022 extends ComputerTool { - constructor(kernel: Kernel, sessionId: string) { - super(kernel, sessionId, '20241022'); + constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { + super(kernel, sessionId, '20241022', width, height); } } export class ComputerTool20250124 extends ComputerTool { - constructor(kernel: Kernel, sessionId: string) { - super(kernel, sessionId, '20250124'); + constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { + super(kernel, sessionId, '20250124', width, height); } } From be44da4daf15e132ae715d7eb3a44ad3381efd46 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Fri, 30 Jan 2026 17:22:08 -0500 Subject: [PATCH 07/13] fix: update model version in Yutori computer use template Changed the model parameter in the Yutori computer use template from "n1-preview-2025-11" to "n1-latest" to ensure the latest model is utilized for tasks. --- pkg/templates/python/yutori-computer-use/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/templates/python/yutori-computer-use/main.py b/pkg/templates/python/yutori-computer-use/main.py index d909c67..9163328 100644 --- a/pkg/templates/python/yutori-computer-use/main.py +++ b/pkg/templates/python/yutori-computer-use/main.py @@ -59,7 +59,7 @@ async def cua_task( print("Kernel browser live view url:", session.live_view_url) loop_result = await sampling_loop( - model="n1-preview-2025-11", + model="n1-latest", task=payload["query"], api_key=str(api_key), kernel=session.kernel, From 3df3a3b9ab014bf437ae9f2aab0f97fb78268bf8 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Fri, 30 Jan 2026 17:23:39 -0500 Subject: [PATCH 08/13] Update Yutori Template model used to latest --- pkg/templates/python/yutori-computer-use/loop.py | 2 +- pkg/templates/typescript/yutori-computer-use/index.ts | 2 +- pkg/templates/typescript/yutori-computer-use/loop.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index ef5748c..aa7f6b2 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -41,7 +41,7 @@ async def screenshot(self) -> ToolResult: async def sampling_loop( *, - model: str = "n1-preview-2025-11", + model: str = "n1-latest", task: str, api_key: str, kernel: Kernel, diff --git a/pkg/templates/typescript/yutori-computer-use/index.ts b/pkg/templates/typescript/yutori-computer-use/index.ts index afe51ba..a4aee2f 100644 --- a/pkg/templates/typescript/yutori-computer-use/index.ts +++ b/pkg/templates/typescript/yutori-computer-use/index.ts @@ -50,7 +50,7 @@ app.action( // Run the sampling loop const mode = payload.mode ?? 'computer_use'; const { finalAnswer, messages } = await samplingLoop({ - model: 'n1-preview-2025-11', + model: 'n1-latest', task: payload.query, apiKey: YUTORI_API_KEY, kernel, diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index 34cdc23..0fcfba9 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -72,7 +72,7 @@ interface SamplingLoopResult { } export async function samplingLoop({ - model = 'n1-preview-2025-11', + model = 'n1-latest', task, apiKey, kernel, From 871d1738cf18f9c81346eb5e590b942dbb7fbf59 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Fri, 30 Jan 2026 19:24:26 -0500 Subject: [PATCH 09/13] fix: update URLs in QA commands to use HTTPS Replaced HTTP links with HTTPS in various kernel invoke commands within the QA documentation to ensure secure connections. This includes updates for the Yutori and Anthropic tasks related to the Magnitasks website. --- .cursor/commands/qa.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.cursor/commands/qa.md b/.cursor/commands/qa.md index 63d7d04..7f5a540 100644 --- a/.cursor/commands/qa.md +++ b/.cursor/commands/qa.md @@ -270,25 +270,25 @@ Once all deployments are complete, present the human with these invoke commands kernel invoke ts-basic get-page-title --payload '{"url": "https://www.google.com"}' kernel invoke ts-captcha-solver test-captcha-solver kernel invoke ts-stagehand teamsize-task --payload '{"company": "Kernel"}' -kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' kernel invoke ts-magnitude mag-url-extract --payload '{"url": "https://en.wikipedia.org/wiki/Special:Random"}' kernel invoke ts-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}' -kernel invoke ts-gemini-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke ts-gemini-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' kernel invoke ts-claude-agent-sdk agent-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 3 stories"}' -kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' -kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' +kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' +kernel invoke ts-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' # Python apps kernel invoke python-basic get-page-title --payload '{"url": "https://www.google.com"}' kernel invoke python-captcha-solver test-captcha-solver kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}' -kernel invoke python-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke python-anthropic-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' kernel invoke python-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}' kernel invoke python-openagi-cua openagi-default-task -p '{"instruction": "Navigate to https://agiopen.org and click the What is Computer Use? button"}' kernel invoke py-claude-agent-sdk agent-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 3 stories"}' -kernel invoke python-gemini-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' -kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' -kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' +kernel invoke python-gemini-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}' +kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "computer_use"}' +kernel invoke python-yutori-cua cua-task --payload '{"query": "Go to https://www.magnitasks.com, Click the Tasks option in the left-side bar, and drag the 5 items in the To Do and In Progress columns to the Done section of the Kanban board. You are done successfully when the items are dragged to Done. Do not click into the items.", "record_replay": true, "mode": "playwright"}' ``` ## Step 7: Automated Runtime Testing (Optional) From e9453f049c37a04e023f235918d64e88b679b872 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:13:40 -0500 Subject: [PATCH 10/13] WIP: Yutori template debugging - revert to n1-preview-2025-11 - Reverted model from n1-latest to n1-preview-2025-11 - Reverted viewport to 1200x800 (API issues with 1280) - Reverted message format to observation role - These changes are pending Yutori API stability fixes --- pkg/templates/python/yutori-computer-use/loop.py | 3 ++- pkg/templates/python/yutori-computer-use/main.py | 2 +- pkg/templates/python/yutori-computer-use/session.py | 2 +- pkg/templates/typescript/yutori-computer-use/index.ts | 2 +- pkg/templates/typescript/yutori-computer-use/loop.ts | 5 +++-- pkg/templates/typescript/yutori-computer-use/session.ts | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index aa7f6b2..a2dfd9a 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -41,7 +41,7 @@ async def screenshot(self) -> ToolResult: async def sampling_loop( *, - model: str = "n1-latest", + model: str = "n1-preview-2025-11", task: str, api_key: str, kernel: Kernel, @@ -86,6 +86,7 @@ async def sampling_loop( } ] + # Add initial screenshot as observation (n1's required format) if initial_screenshot.get("base64_image"): conversation_messages.append({ "role": "observation", diff --git a/pkg/templates/python/yutori-computer-use/main.py b/pkg/templates/python/yutori-computer-use/main.py index 9163328..d909c67 100644 --- a/pkg/templates/python/yutori-computer-use/main.py +++ b/pkg/templates/python/yutori-computer-use/main.py @@ -59,7 +59,7 @@ async def cua_task( print("Kernel browser live view url:", session.live_view_url) loop_result = await sampling_loop( - model="n1-latest", + model="n1-preview-2025-11", task=payload["query"], api_key=str(api_key), kernel=session.kernel, diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index 42dc017..1c449ec 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -32,7 +32,7 @@ class KernelBrowserSession: stealth: bool = True timeout_seconds: int = 300 - viewport_width: int = 1280 + viewport_width: int = 1200 viewport_height: int = 800 # Replay recording options diff --git a/pkg/templates/typescript/yutori-computer-use/index.ts b/pkg/templates/typescript/yutori-computer-use/index.ts index a4aee2f..afe51ba 100644 --- a/pkg/templates/typescript/yutori-computer-use/index.ts +++ b/pkg/templates/typescript/yutori-computer-use/index.ts @@ -50,7 +50,7 @@ app.action( // Run the sampling loop const mode = payload.mode ?? 'computer_use'; const { finalAnswer, messages } = await samplingLoop({ - model: 'n1-latest', + model: 'n1-preview-2025-11', task: payload.query, apiKey: YUTORI_API_KEY, kernel, diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index 0fcfba9..a4b7374 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -72,7 +72,7 @@ interface SamplingLoopResult { } export async function samplingLoop({ - model = 'n1-latest', + model = 'n1-preview-2025-11', task, apiKey, kernel, @@ -80,7 +80,7 @@ export async function samplingLoop({ cdpWsUrl, maxTokens = 4096, maxIterations = 50, - viewportWidth = 1280, + viewportWidth = 1200, viewportHeight = 800, mode = 'computer_use', }: SamplingLoopOptions): Promise { @@ -118,6 +118,7 @@ export async function samplingLoop({ }, ]; + // Add initial screenshot as observation (n1's required format) if (initialScreenshot.base64Image) { conversationMessages.push({ role: 'observation', diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index d3324f0..644f822 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -37,7 +37,7 @@ const DEFAULT_OPTIONS: Required = { timeoutSeconds: 300, recordReplay: false, replayGracePeriod: 5.0, - viewportWidth: 1280, + viewportWidth: 1200, viewportHeight: 800, }; From 70423d5d18200b5365895d779cb6e3c27217ecf9 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:14:04 -0500 Subject: [PATCH 11/13] Revert Yutori template changes pending API stability Reverting Yutori template modifications made in this branch. The Yutori API has stability issues with: - 1280x800 screenshots (500 errors) - n1-latest model with observation message format - Multi-turn conversations failing after 2-3 iterations Yutori work preserved in branch: tanmay/yutori-template-fixes This PR now focuses only on: - Adding 1280x800 viewport support to CLI - Updating Anthropic computer-use templates --- pkg/templates/python/yutori-computer-use/README.md | 4 ++-- pkg/templates/python/yutori-computer-use/loop.py | 3 +-- pkg/templates/python/yutori-computer-use/session.py | 1 + pkg/templates/python/yutori-computer-use/tools/computer.py | 2 +- .../python/yutori-computer-use/tools/playwright_computer.py | 2 +- pkg/templates/typescript/yutori-computer-use/README.md | 4 ++-- pkg/templates/typescript/yutori-computer-use/loop.ts | 6 +++--- pkg/templates/typescript/yutori-computer-use/session.ts | 5 +++-- .../typescript/yutori-computer-use/tools/computer.ts | 2 +- .../yutori-computer-use/tools/playwright-computer.ts | 2 +- 10 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pkg/templates/python/yutori-computer-use/README.md b/pkg/templates/python/yutori-computer-use/README.md index 2b52fd0..2f8ec2f 100644 --- a/pkg/templates/python/yutori-computer-use/README.md +++ b/pkg/templates/python/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/python/yutori-computer-use/loop.py b/pkg/templates/python/yutori-computer-use/loop.py index a2dfd9a..236d4ad 100644 --- a/pkg/templates/python/yutori-computer-use/loop.py +++ b/pkg/templates/python/yutori-computer-use/loop.py @@ -49,7 +49,7 @@ async def sampling_loop( cdp_ws_url: Optional[str] = None, max_tokens: int = 4096, max_iterations: int = 50, - viewport_width: int = 1280, + viewport_width: int = 1200, viewport_height: int = 800, mode: BrowserMode = "computer_use", ) -> dict[str, Any]: @@ -86,7 +86,6 @@ async def sampling_loop( } ] - # Add initial screenshot as observation (n1's required format) if initial_screenshot.get("base64_image"): conversation_messages.append({ "role": "observation", diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index 1c449ec..f4f2d01 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -56,6 +56,7 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": self.viewport_width, "height": self.viewport_height, + "refresh_rate": 25, }, ) diff --git a/pkg/templates/python/yutori-computer-use/tools/computer.py b/pkg/templates/python/yutori-computer-use/tools/computer.py index e72f191..4460161 100644 --- a/pkg/templates/python/yutori-computer-use/tools/computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/computer.py @@ -91,7 +91,7 @@ class N1Action(TypedDict, total=False): class ComputerTool: - def __init__(self, kernel: Kernel, session_id: str, width: int = 1280, height: int = 800): + def __init__(self, kernel: Kernel, session_id: str, width: int = 1200, height: int = 800): self.kernel = kernel self.session_id = session_id self.width = width diff --git a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py index c0b8839..df98628 100644 --- a/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py +++ b/pkg/templates/python/yutori-computer-use/tools/playwright_computer.py @@ -38,7 +38,7 @@ class PlaywrightComputerTool: - def __init__(self, cdp_ws_url: str, width: int = 1280, height: int = 800): + def __init__(self, cdp_ws_url: str, width: int = 1200, height: int = 800): self.cdp_ws_url = cdp_ws_url self.width = width self.height = height diff --git a/pkg/templates/typescript/yutori-computer-use/README.md b/pkg/templates/typescript/yutori-computer-use/README.md index 60bcd25..625c94d 100644 --- a/pkg/templates/typescript/yutori-computer-use/README.md +++ b/pkg/templates/typescript/yutori-computer-use/README.md @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view ## Viewport Configuration -Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. +Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default. -> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. +> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy. See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations. diff --git a/pkg/templates/typescript/yutori-computer-use/loop.ts b/pkg/templates/typescript/yutori-computer-use/loop.ts index a4b7374..351aa9c 100644 --- a/pkg/templates/typescript/yutori-computer-use/loop.ts +++ b/pkg/templates/typescript/yutori-computer-use/loop.ts @@ -53,9 +53,9 @@ interface SamplingLoopOptions { cdpWsUrl?: string; maxTokens?: number; maxIterations?: number; - /** Viewport width for coordinate scaling */ + /** Viewport width for coordinate scaling (default: 1200, closest to Yutori's 1280 recommendation) */ viewportWidth?: number; - /** Viewport height for coordinate scaling */ + /** Viewport height for coordinate scaling (default: 800 per Yutori recommendation) */ viewportHeight?: number; /** * Browser interaction mode: @@ -80,6 +80,7 @@ export async function samplingLoop({ cdpWsUrl, maxTokens = 4096, maxIterations = 50, + // Default viewport: 1200x800 (closest Kernel-supported size to Yutori's recommended 1280x800) viewportWidth = 1200, viewportHeight = 800, mode = 'computer_use', @@ -118,7 +119,6 @@ export async function samplingLoop({ }, ]; - // Add initial screenshot as observation (n1's required format) if (initialScreenshot.base64Image) { conversationMessages.push({ role: 'observation', diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 644f822..3a3c567 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -16,9 +16,9 @@ export interface SessionOptions { recordReplay?: boolean; /** Grace period in seconds before stopping replay */ replayGracePeriod?: number; - /** Viewport width */ + /** Viewport width (default: 1200, closest to Yutori's 1280 recommendation) */ viewportWidth?: number; - /** Viewport height */ + /** Viewport height (default: 800 per Yutori recommendation) */ viewportHeight?: number; } @@ -117,6 +117,7 @@ export class KernelBrowserSession { viewport: { width: this.options.viewportWidth, height: this.options.viewportHeight, + refresh_rate: 25, }, }); diff --git a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts index e9cdaf3..46fd76e 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/computer.ts @@ -98,7 +98,7 @@ export class ComputerTool { private width: number; private height: number; - constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) { + constructor(kernel: Kernel, sessionId: string, width = 1200, height = 800) { this.kernel = kernel; this.sessionId = sessionId; this.width = width; diff --git a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts index 3062c44..d6ce229 100644 --- a/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts +++ b/pkg/templates/typescript/yutori-computer-use/tools/playwright-computer.ts @@ -36,7 +36,7 @@ export class PlaywrightComputerTool { private context: BrowserContext | null = null; private page: Page | null = null; - constructor(cdpWsUrl: string, width = 1280, height = 800) { + constructor(cdpWsUrl: string, width = 1200, height = 800) { this.cdpWsUrl = cdpWsUrl; this.width = width; this.height = height; From 510fadbb47fa9d9f06f0bb997d3c5d20fc28c462 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:19:00 -0500 Subject: [PATCH 12/13] remove refresh rate for yutori template --- pkg/templates/python/yutori-computer-use/session.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/templates/python/yutori-computer-use/session.py b/pkg/templates/python/yutori-computer-use/session.py index f4f2d01..1c449ec 100644 --- a/pkg/templates/python/yutori-computer-use/session.py +++ b/pkg/templates/python/yutori-computer-use/session.py @@ -56,7 +56,6 @@ async def __aenter__(self) -> "KernelBrowserSession": viewport={ "width": self.viewport_width, "height": self.viewport_height, - "refresh_rate": 25, }, ) From 8e3233890500377e782da5511e267a602d2ea069 Mon Sep 17 00:00:00 2001 From: Daniel Prevoznik Date: Sat, 31 Jan 2026 08:38:21 -0500 Subject: [PATCH 13/13] Remove refresh_rate from TypeScript Yutori template viewport Aligns TypeScript template with Python template and other templates where refresh_rate was removed from viewport settings. --- pkg/templates/typescript/yutori-computer-use/session.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/templates/typescript/yutori-computer-use/session.ts b/pkg/templates/typescript/yutori-computer-use/session.ts index 3a3c567..2ba5969 100644 --- a/pkg/templates/typescript/yutori-computer-use/session.ts +++ b/pkg/templates/typescript/yutori-computer-use/session.ts @@ -117,7 +117,6 @@ export class KernelBrowserSession { viewport: { width: this.options.viewportWidth, height: this.options.viewportHeight, - refresh_rate: 25, }, });