From b520f006a3b538ebead87cd67dfbb5e359c7ffbf Mon Sep 17 00:00:00 2001 From: Scott Hanselman Date: Fri, 24 Apr 2026 12:17:26 -0700 Subject: [PATCH 1/2] feat: add camera.clip, location.get, and canvas local files Camera: - Add camera.clip video recording (MP4, configurable duration/audio) - Duration clamped to 60s max, uses MediaCapture with HD720p encoding - 4 new tests for args defaults, clamping, routing, error handling Location: - New LocationCapability with location.get command - Uses Windows.Devices.Geolocation with accuracy/timeout params - Returns lat/lon/accuracy/timestamp - Permission denial mapped to LOCATION_PERMISSION_REQUIRED - 9 new tests Canvas: - Local file serving via WebView2 SetVirtualHostNameToFolderMapping - Maps openclaw-canvas.local to %LOCALAPPDATA%/OpenClawTray/canvas/ - Auto-reload with FileSystemWatcher (500ms debounce) - Whitelisted in IsUrlSafe security check All 808 tests pass (686 shared + 122 tray). --- .../Capabilities/CameraCapability.cs | 61 ++++- .../Capabilities/LocationCapability.cs | 85 ++++++ .../Services/CameraCaptureService.cs | 68 +++++ .../Services/NodeService.cs | 57 ++++ .../Windows/CanvasWindow.xaml.cs | 76 +++++- .../OpenClaw.Shared.Tests/CapabilityTests.cs | 245 +++++++++++++++++- 6 files changed, 587 insertions(+), 5 deletions(-) create mode 100644 src/OpenClaw.Shared/Capabilities/LocationCapability.cs diff --git a/src/OpenClaw.Shared/Capabilities/CameraCapability.cs b/src/OpenClaw.Shared/Capabilities/CameraCapability.cs index 49abfc0a..8854332e 100644 --- a/src/OpenClaw.Shared/Capabilities/CameraCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/CameraCapability.cs @@ -14,8 +14,8 @@ public class CameraCapability : NodeCapabilityBase private static readonly string[] _commands = new[] { "camera.list", - "camera.snap" - // Future: "camera.clip" (video) + "camera.snap", + "camera.clip" }; public override IReadOnlyList Commands => _commands; @@ -23,6 +23,7 @@ public class CameraCapability : NodeCapabilityBase // Events for platform-specific implementation public event Func>? ListRequested; public event Func>? SnapRequested; + public event Func>? ClipRequested; public CameraCapability(IOpenClawLogger logger) : base(logger) { @@ -34,6 +35,7 @@ public override async Task ExecuteAsync(NodeInvokeRequest re { "camera.list" => await HandleListAsync(request), "camera.snap" => await HandleSnapAsync(request), + "camera.clip" => await HandleClipAsync(request), _ => Error($"Unknown command: {request.Command}") }; } @@ -97,6 +99,45 @@ private async Task HandleSnapAsync(NodeInvokeRequest request return Error($"Snap failed: {ex.Message}"); } } + + private async Task HandleClipAsync(NodeInvokeRequest request) + { + var deviceId = GetStringArg(request.Args, "deviceId"); + var durationMs = Math.Min(GetIntArg(request.Args, "durationMs", 3000), 60000); + var includeAudio = GetBoolArg(request.Args, "includeAudio", true); + var format = GetStringArg(request.Args, "format", "mp4") ?? "mp4"; + + Logger.Info($"camera.clip: deviceId={deviceId ?? "(default)"}, durationMs={durationMs}, includeAudio={includeAudio}, format={format}"); + + if (ClipRequested == null) + { + return Error("Camera clip not available"); + } + + try + { + var result = await ClipRequested(new CameraClipArgs + { + DeviceId = deviceId, + DurationMs = durationMs, + IncludeAudio = includeAudio, + Format = format + }); + + return Success(new + { + format = result.Format, + base64 = result.Base64, + durationMs = result.DurationMs, + hasAudio = result.HasAudio + }); + } + catch (Exception ex) + { + Logger.Error("Camera clip failed", ex); + return Error($"Clip failed: {ex.Message}"); + } + } } public class CameraInfo @@ -121,3 +162,19 @@ public class CameraSnapResult public int Height { get; set; } public string Base64 { get; set; } = ""; } + +public class CameraClipArgs +{ + public string? DeviceId { get; set; } + public int DurationMs { get; set; } = 3000; + public bool IncludeAudio { get; set; } = true; + public string Format { get; set; } = "mp4"; +} + +public class CameraClipResult +{ + public string Format { get; set; } = "mp4"; + public string Base64 { get; set; } = ""; + public int DurationMs { get; set; } + public bool HasAudio { get; set; } +} diff --git a/src/OpenClaw.Shared/Capabilities/LocationCapability.cs b/src/OpenClaw.Shared/Capabilities/LocationCapability.cs new file mode 100644 index 00000000..93b8309c --- /dev/null +++ b/src/OpenClaw.Shared/Capabilities/LocationCapability.cs @@ -0,0 +1,85 @@ +using System; +using System.Collections.Generic; +using System.Threading.Tasks; + +namespace OpenClaw.Shared.Capabilities; + +/// +/// Location capability using Windows.Devices.Geolocation +/// +public class LocationCapability : NodeCapabilityBase +{ + public override string Category => "location"; + + private static readonly string[] _commands = new[] { "location.get" }; + + public override IReadOnlyList Commands => _commands; + + public event Func>? GetRequested; + + public LocationCapability(IOpenClawLogger logger) : base(logger) + { + } + + public override async Task ExecuteAsync(NodeInvokeRequest request) + { + return request.Command switch + { + "location.get" => await HandleGetAsync(request), + _ => Error($"Unknown command: {request.Command}") + }; + } + + private async Task HandleGetAsync(NodeInvokeRequest request) + { + var accuracy = GetStringArg(request.Args, "accuracy", "default"); + var maxAgeMs = GetIntArg(request.Args, "maxAge", 30000); + var timeoutMs = GetIntArg(request.Args, "locationTimeout", 10000); + + Logger.Info($"location.get: accuracy={accuracy}, maxAge={maxAgeMs}, timeout={timeoutMs}"); + + if (GetRequested == null) + return Error("Location not available"); + + try + { + var result = await GetRequested(new LocationGetArgs + { + Accuracy = accuracy ?? "default", + MaxAgeMs = maxAgeMs, + TimeoutMs = timeoutMs + }); + return Success(new + { + latitude = result.Latitude, + longitude = result.Longitude, + accuracy = result.AccuracyMeters, + timestamp = result.TimestampMs + }); + } + catch (UnauthorizedAccessException) + { + return Error("LOCATION_PERMISSION_REQUIRED"); + } + catch (Exception ex) + { + Logger.Error("location.get failed", ex); + return Error($"Location failed: {ex.Message}"); + } + } +} + +public class LocationGetArgs +{ + public string Accuracy { get; set; } = "default"; + public int MaxAgeMs { get; set; } = 30000; + public int TimeoutMs { get; set; } = 10000; +} + +public class LocationResult +{ + public double Latitude { get; set; } + public double Longitude { get; set; } + public double AccuracyMeters { get; set; } + public long TimestampMs { get; set; } +} diff --git a/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs b/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs index dc2adcfe..42c22bd6 100644 --- a/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs @@ -127,6 +127,74 @@ public async Task SnapAsync(CameraSnapArgs args) } } + public async Task ClipAsync(CameraClipArgs args) + { + _logger.Info($"camera.clip start: deviceId={args.DeviceId ?? "(default)"}, durationMs={args.DurationMs}, includeAudio={args.IncludeAudio}, format={args.Format}"); + await _captureLock.WaitAsync(); + + try + { + using var capture = new MediaCapture(); + + var settings = new MediaCaptureInitializationSettings + { + VideoDeviceId = args.DeviceId, + MemoryPreference = MediaCaptureMemoryPreference.Cpu, + StreamingCaptureMode = args.IncludeAudio + ? StreamingCaptureMode.AudioAndVideo + : StreamingCaptureMode.Video + }; + + var initStart = DateTime.UtcNow; + await capture.InitializeAsync(settings); + _logger.Info($"camera.clip: MediaCapture initialized in {(DateTime.UtcNow - initStart).TotalMilliseconds:0}ms"); + + using var stream = new InMemoryRandomAccessStream(); + var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.HD720p); + + var recordStart = DateTime.UtcNow; + await capture.StartRecordToStreamAsync(profile, stream); + _logger.Info($"camera.clip: recording started"); + + await Task.Delay(args.DurationMs); + + await capture.StopRecordAsync(); + var elapsed = (DateTime.UtcNow - recordStart).TotalMilliseconds; + _logger.Info($"camera.clip: recording stopped after {elapsed:0}ms"); + + stream.Seek(0); + var reader = new DataReader(stream); + await reader.LoadAsync((uint)stream.Size); + var buffer = new byte[stream.Size]; + reader.ReadBytes(buffer); + var base64 = Convert.ToBase64String(buffer); + + _logger.Info($"camera.clip: encoded {base64.Length} chars"); + + return new CameraClipResult + { + Format = args.Format, + Base64 = base64, + DurationMs = args.DurationMs, + HasAudio = args.IncludeAudio + }; + } + catch (UnauthorizedAccessException ex) + { + _logger.Error("Camera access denied. Check Windows privacy settings.", ex); + throw; + } + catch (Exception ex) + { + _logger.Error($"camera.clip failed (0x{ex.HResult:X8})", ex); + throw; + } + finally + { + _captureLock.Release(); + } + } + private async Task CaptureWithFallbackAsync( MediaCapture capture, List candidates) diff --git a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs index 361fc365..e83fc684 100644 --- a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs @@ -1,4 +1,5 @@ using System; +using System.Threading; using System.Threading.Tasks; using Microsoft.Toolkit.Uwp.Notifications; using Microsoft.UI.Dispatching; @@ -29,6 +30,7 @@ public class NodeService : IDisposable private CanvasCapability? _canvasCapability; private ScreenCapability? _screenCapability; private CameraCapability? _cameraCapability; + private LocationCapability? _locationCapability; private readonly string _dataPath; private string? _token; @@ -135,8 +137,14 @@ private void RegisterCapabilities() _cameraCapability = new CameraCapability(_logger); _cameraCapability.ListRequested += OnCameraList; _cameraCapability.SnapRequested += OnCameraSnap; + _cameraCapability.ClipRequested += OnCameraClip; _nodeClient.RegisterCapability(_cameraCapability); + // Location capability + _locationCapability = new LocationCapability(_logger); + _locationCapability.GetRequested += async (args) => await GetLocationAsync(args); + _nodeClient.RegisterCapability(_locationCapability); + _logger.Info("All capabilities registered"); } @@ -479,6 +487,55 @@ private async Task OnCameraSnap(CameraSnapArgs args) } } + private async Task OnCameraClip(CameraClipArgs args) + { + if (_cameraCaptureService == null) + { + throw new InvalidOperationException("Camera capture service not available"); + } + + try + { + return await _cameraCaptureService.ClipAsync(args); + } + catch (UnauthorizedAccessException ex) + { + try + { + new ToastContentBuilder() + .AddText(LocalizationHelper.GetString("Toast_CameraBlocked")) + .AddText(LocalizationHelper.GetString("Toast_CameraBlockedDetail")) + .Show(); + } + catch { } + + throw new InvalidOperationException( + "Camera access blocked. Enable camera access for desktop apps in Windows Privacy settings.", + ex); + } + } + + private async Task GetLocationAsync(LocationGetArgs args) + { + var geolocator = new global::Windows.Devices.Geolocation.Geolocator + { + DesiredAccuracy = args.Accuracy == "precise" + ? global::Windows.Devices.Geolocation.PositionAccuracy.High + : global::Windows.Devices.Geolocation.PositionAccuracy.Default + }; + + using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(args.TimeoutMs)); + var position = await geolocator.GetGeopositionAsync().AsTask(cts.Token); + + return new LocationResult + { + Latitude = position.Coordinate.Point.Position.Latitude, + Longitude = position.Coordinate.Point.Position.Longitude, + AccuracyMeters = position.Coordinate.Accuracy, + TimestampMs = position.Coordinate.Timestamp.ToUnixTimeMilliseconds() + }; + } + #endregion public void Dispose() diff --git a/src/OpenClaw.Tray.WinUI/Windows/CanvasWindow.xaml.cs b/src/OpenClaw.Tray.WinUI/Windows/CanvasWindow.xaml.cs index 9729c1b5..27c72e4a 100644 --- a/src/OpenClaw.Tray.WinUI/Windows/CanvasWindow.xaml.cs +++ b/src/OpenClaw.Tray.WinUI/Windows/CanvasWindow.xaml.cs @@ -1,6 +1,7 @@ using System; using System.IO; using System.Text.RegularExpressions; +using System.Threading; using System.Threading.Tasks; using System.Runtime.InteropServices; using Microsoft.UI.Xaml; @@ -39,6 +40,12 @@ public sealed partial class CanvasWindow : WindowEx private string? _pendingHtml; private readonly TaskCompletionSource _webViewReadyTcs = new(TaskCreationOptions.RunContinuationsAsynchronously); private TaskCompletionSource? _navigationTcs; + + private readonly string _canvasDir = Path.Combine( + Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), + "OpenClawTray", "canvas"); + private FileSystemWatcher? _canvasWatcher; + private long _lastReloadTicks = 0; // HTML sanitization — block embedded iframes/objects/embeds/applets private static readonly Regex s_sanitizeBlock = new( @@ -64,6 +71,12 @@ private bool IsUrlSafe(string url) { return IsSafeDataUrl(url); } + // Allow URLs from the canvas virtual host + if (url.StartsWith("https://openclaw-canvas.local/", StringComparison.OrdinalIgnoreCase) || + url.Equals("https://openclaw-canvas.local", StringComparison.OrdinalIgnoreCase)) + { + return true; + } // Allow URLs from the trusted gateway origin with strict boundary check if (!string.IsNullOrEmpty(_trustedGatewayOrigin) && url.StartsWith(_trustedGatewayOrigin, StringComparison.OrdinalIgnoreCase) && @@ -162,7 +175,25 @@ private string RewriteGatewayUrl(string url) } // Same origin — just add token if needed - return AppendGatewayToken(url); + url = AppendGatewayToken(url); + + // If this is a canvas document path and we have it locally, use the virtual host + if (url.Contains("/__openclaw__/canvas/documents/") && !string.IsNullOrEmpty(_canvasDir)) + { + var pathPart = new Uri(url).AbsolutePath; + var localRelative = pathPart.Replace("/__openclaw__/canvas/documents/", ""); + var localPath = Path.GetFullPath(Path.Combine(_canvasDir, localRelative.Replace('/', Path.DirectorySeparatorChar))); + // Containment check — block directory traversal + if (localPath.StartsWith(_canvasDir + Path.DirectorySeparatorChar, StringComparison.OrdinalIgnoreCase) && + File.Exists(localPath)) + { + var localUrl = $"https://openclaw-canvas.local/{localRelative}"; + Logger.Info($"[Canvas] Using local file: {localUrl}"); + return localUrl; + } + } + + return url; } catch (Exception ex) { @@ -197,7 +228,29 @@ private async void InitializeWebViewAsync() ErrorPanel.Visibility = Visibility.Collapsed; await CanvasWebView.EnsureCoreWebView2Async(); - + + // Map local canvas files to a virtual hostname so canvas content + // can be served without hitting the gateway HTTP server. + // Files in %LOCALAPPDATA%/OpenClawTray/canvas/ are served at + // https://openclaw-canvas.local/ + Directory.CreateDirectory(_canvasDir); + CanvasWebView.CoreWebView2.SetVirtualHostNameToFolderMapping( + "openclaw-canvas.local", + _canvasDir, + CoreWebView2HostResourceAccessKind.Allow); + Logger.Info($"[Canvas] Virtual host mapped: openclaw-canvas.local → {_canvasDir}"); + + // Watch for local canvas file changes and auto-reload + _canvasWatcher = new FileSystemWatcher(_canvasDir) + { + IncludeSubdirectories = true, + NotifyFilter = NotifyFilters.LastWrite | NotifyFilters.FileName | NotifyFilters.DirectoryName, + EnableRaisingEvents = true + }; + _canvasWatcher.Changed += OnCanvasFileChanged; + _canvasWatcher.Created += OnCanvasFileChanged; + _canvasWatcher.Renamed += (s, e) => OnCanvasFileChanged(s, e); + // Configure WebView2 CanvasWebView.CoreWebView2.Settings.IsScriptEnabled = true; CanvasWebView.CoreWebView2.Settings.AreDefaultScriptDialogsEnabled = false; @@ -320,9 +373,28 @@ private void OnNavigationCompleted(CoreWebView2 sender, CoreWebView2NavigationCo } } + private void OnCanvasFileChanged(object sender, FileSystemEventArgs e) + { + // Debounce — ignore rapid file changes within 500ms (thread-safe) + var nowTicks = DateTime.UtcNow.Ticks; + var prevTicks = Interlocked.Exchange(ref _lastReloadTicks, nowTicks); + if ((nowTicks - prevTicks) < TimeSpan.FromMilliseconds(500).Ticks) return; + + Logger.Info($"[Canvas] File changed: {e.Name}, reloading"); + DispatcherQueue.TryEnqueue(() => + { + if (_isWebViewInitialized && !IsClosed) + { + CanvasWebView.CoreWebView2.Reload(); + } + }); + } + private void OnWindowClosed(object sender, WindowEventArgs args) { IsClosed = true; + _canvasWatcher?.Dispose(); + _canvasWatcher = null; } private void OnRetryClick(object sender, RoutedEventArgs e) diff --git a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs index e32980ce..985fe714 100644 --- a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs +++ b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs @@ -851,7 +851,8 @@ public void CanHandle_CameraCommands() var cap = new CameraCapability(NullLogger.Instance); Assert.True(cap.CanHandle("camera.list")); Assert.True(cap.CanHandle("camera.snap")); - Assert.False(cap.CanHandle("camera.clip")); + Assert.True(cap.CanHandle("camera.clip")); + Assert.False(cap.CanHandle("camera.unknown")); Assert.Equal("camera", cap.Category); } @@ -964,4 +965,246 @@ public async Task Snap_ReturnsError_WhenHandlerThrows() Assert.False(res.Ok); Assert.Contains("Camera access blocked", res.Error); } + + [Fact] + public void CameraClipArgs_DefaultValues() + { + var args = new CameraClipArgs(); + Assert.Equal(3000, args.DurationMs); + Assert.True(args.IncludeAudio); + Assert.Equal("mp4", args.Format); + Assert.Null(args.DeviceId); + } + + [Fact] + public async Task Clip_ClampsDuration_ToMax60000() + { + var cap = new CameraCapability(NullLogger.Instance); + CameraClipArgs? receivedArgs = null; + cap.ClipRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new CameraClipResult { Format = "mp4", Base64 = "vid", DurationMs = args.DurationMs, HasAudio = true }); + }; + + var req = new NodeInvokeRequest + { + Id = "clip1", + Command = "camera.clip", + Args = Parse("""{"durationMs":120000}""") + }; + + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(receivedArgs); + Assert.Equal(60000, receivedArgs!.DurationMs); + } + + [Fact] + public async Task Clip_RoutesToHandler_WithArgs() + { + var cap = new CameraCapability(NullLogger.Instance); + CameraClipArgs? receivedArgs = null; + cap.ClipRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new CameraClipResult { Format = "mp4", Base64 = "vid", DurationMs = args.DurationMs, HasAudio = args.IncludeAudio }); + }; + + var req = new NodeInvokeRequest + { + Id = "clip2", + Command = "camera.clip", + Args = Parse("""{"deviceId":"cam-1","durationMs":5000,"includeAudio":false,"format":"mp4"}""") + }; + + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(receivedArgs); + Assert.Equal("cam-1", receivedArgs!.DeviceId); + Assert.Equal(5000, receivedArgs.DurationMs); + Assert.False(receivedArgs.IncludeAudio); + Assert.Equal("mp4", receivedArgs.Format); + } + + [Fact] + public async Task Clip_ReturnsError_WhenNoHandler() + { + var cap = new CameraCapability(NullLogger.Instance); + var req = new NodeInvokeRequest { Id = "clip3", Command = "camera.clip", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.NotNull(res.Error); + Assert.Contains("not available", res.Error!, StringComparison.OrdinalIgnoreCase); + } } + +public class LocationCapabilityTests +{ + private static JsonElement Parse(string json) + { + using var doc = JsonDocument.Parse(json); + return doc.RootElement.Clone(); + } + + [Fact] + public void LocationGetArgs_HasCorrectDefaults() + { + var args = new LocationGetArgs(); + Assert.Equal("default", args.Accuracy); + Assert.Equal(30000, args.MaxAgeMs); + Assert.Equal(10000, args.TimeoutMs); + } + + [Fact] + public void CanHandle_LocationCommands() + { + var cap = new LocationCapability(NullLogger.Instance); + Assert.True(cap.CanHandle("location.get")); + Assert.False(cap.CanHandle("location.watch")); + Assert.Equal("location", cap.Category); + } + + [Fact] + public async Task Get_ReturnsError_WhenNoHandler() + { + var cap = new LocationCapability(NullLogger.Instance); + var req = new NodeInvokeRequest { Id = "loc1", Command = "location.get", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.NotNull(res.Error); + Assert.Contains("not available", res.Error!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Get_ReturnsLocation_WhenHandler() + { + var cap = new LocationCapability(NullLogger.Instance); + cap.GetRequested += (args) => Task.FromResult(new LocationResult + { + Latitude = 47.6062, + Longitude = -122.3321, + AccuracyMeters = 15.5, + TimestampMs = 1700000000000 + }); + + var req = new NodeInvokeRequest { Id = "loc2", Command = "location.get", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(res.Payload); + + var json = System.Text.Json.JsonSerializer.Serialize(res.Payload); + using var doc = System.Text.Json.JsonDocument.Parse(json); + var root = doc.RootElement; + Assert.Equal(47.6062, root.GetProperty("latitude").GetDouble(), 4); + Assert.Equal(-122.3321, root.GetProperty("longitude").GetDouble(), 4); + Assert.Equal(15.5, root.GetProperty("accuracy").GetDouble(), 1); + Assert.Equal(1700000000000, root.GetProperty("timestamp").GetInt64()); + } + + [Fact] + public async Task Get_PassesArgs_ToHandler() + { + var cap = new LocationCapability(NullLogger.Instance); + LocationGetArgs? receivedArgs = null; + cap.GetRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new LocationResult + { + Latitude = 0, Longitude = 0, AccuracyMeters = 0, TimestampMs = 0 + }); + }; + + var req = new NodeInvokeRequest + { + Id = "loc3", + Command = "location.get", + Args = Parse("""{"accuracy":"precise","maxAge":5000,"locationTimeout":3000}""") + }; + + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(receivedArgs); + Assert.Equal("precise", receivedArgs!.Accuracy); + Assert.Equal(5000, receivedArgs.MaxAgeMs); + Assert.Equal(3000, receivedArgs.TimeoutMs); + } + + [Fact] + public async Task Get_UsesDefaults_WhenArgsMissing() + { + var cap = new LocationCapability(NullLogger.Instance); + LocationGetArgs? receivedArgs = null; + cap.GetRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new LocationResult + { + Latitude = 0, Longitude = 0, AccuracyMeters = 0, TimestampMs = 0 + }); + }; + + var req = new NodeInvokeRequest { Id = "loc4", Command = "location.get", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.Equal("default", receivedArgs!.Accuracy); + Assert.Equal(30000, receivedArgs.MaxAgeMs); + Assert.Equal(10000, receivedArgs.TimeoutMs); + } + + [Fact] + public async Task Get_ReturnsPermissionError_WhenUnauthorized() + { + var cap = new LocationCapability(NullLogger.Instance); + cap.GetRequested += (args) => throw new UnauthorizedAccessException("No permission"); + + var req = new NodeInvokeRequest { Id = "loc5", Command = "location.get", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Equal("LOCATION_PERMISSION_REQUIRED", res.Error); + } + + [Fact] + public async Task Get_ReturnsError_WhenHandlerThrows() + { + var cap = new LocationCapability(NullLogger.Instance); + cap.GetRequested += (args) => throw new InvalidOperationException("GPS unavailable"); + + var req = new NodeInvokeRequest { Id = "loc6", Command = "location.get", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("GPS unavailable", res.Error); + } + + [Fact] + public void LocationResult_Serialization() + { + var result = new LocationResult + { + Latitude = 48.8566, + Longitude = 2.3522, + AccuracyMeters = 10.0, + TimestampMs = 1700000000000 + }; + + var json = System.Text.Json.JsonSerializer.Serialize(result); + var deserialized = System.Text.Json.JsonSerializer.Deserialize(json); + + Assert.NotNull(deserialized); + Assert.Equal(result.Latitude, deserialized!.Latitude); + Assert.Equal(result.Longitude, deserialized.Longitude); + Assert.Equal(result.AccuracyMeters, deserialized.AccuracyMeters); + Assert.Equal(result.TimestampMs, deserialized.TimestampMs); + } + + [Fact] + public async Task ExecuteAsync_ReturnsError_ForUnknownCommand() + { + var cap = new LocationCapability(NullLogger.Instance); + var req = new NodeInvokeRequest { Id = "loc7", Command = "location.watch", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("Unknown command", res.Error); + } +} \ No newline at end of file From 5a0a373e6d9ed9ff8cca645706335702321e4d93 Mon Sep 17 00:00:00 2001 From: Scott Hanselman Date: Sat, 25 Apr 2026 19:23:16 -0700 Subject: [PATCH 2/2] fix: align Windows node gateway command parity Use the gateway canonical screen.snapshot command, remove the unsupported screen.list surface, and make camera.clip choose a supported Windows MediaCapture record stream instead of forcing HD720p. Also skip startup update prompts in debug builds so local node debugging can connect immediately. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 15 +- docs/WINDOWS_NODE_ARCHITECTURE.md | 3 +- docs/WINDOWS_NODE_TESTING.md | 3 +- docs/gateway-node-integration.md | 396 ++++++++++++++++++ .../Capabilities/ScreenCapability.cs | 56 +-- src/OpenClaw.Tray.WinUI/App.xaml.cs | 5 + .../Services/CameraCaptureService.cs | 77 +++- .../Services/NodeService.cs | 7 - .../Services/ScreenCaptureService.cs | 35 -- .../OpenClaw.Shared.Tests/CapabilityTests.cs | 69 +-- tests/OpenClaw.Shared.Tests/README.md | 6 +- 11 files changed, 499 insertions(+), 173 deletions(-) create mode 100644 docs/gateway-node-integration.md diff --git a/README.md b/README.md index ffac859e..c89b24f0 100644 --- a/README.md +++ b/README.md @@ -172,8 +172,8 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t |------------|----------|-------------| | **System** | `system.notify`, `system.run`, `system.run.prepare`, `system.which`, `system.execApprovals.get`, `system.execApprovals.set` | Show Windows toast notifications, execute commands with policy controls | | **Canvas** | `canvas.present`, `canvas.hide`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, `canvas.a2ui.push`, `canvas.a2ui.reset` | Display and control a WebView2 window | -| **Screen** | `screen.capture`, `screen.list` | Capture screenshots | -| **Camera** | `camera.list`, `camera.snap` | Enumerate cameras and capture a still photo | +| **Screen** | `screen.snapshot` | Capture screenshots | +| **Camera** | `camera.list`, `camera.snap`, `camera.clip` | Enumerate cameras and capture still photos or short video clips | #### Node Setup @@ -203,10 +203,11 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t "canvas.snapshot", "canvas.a2ui.push", "canvas.a2ui.reset", - "screen.capture", - "screen.list", - "camera.list", - "camera.snap" + "screen.snapshot", + "camera.list", + "camera.snap", + "camera.clip", + "location.get" ] } } @@ -229,7 +230,7 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t openclaw nodes canvas a2ui push --node --jsonl "$(Get-Content -Raw .\\ui.jsonl)" # Take a screenshot - openclaw nodes invoke --node --command screen.capture --params '{"screenIndex":0,"format":"png"}' + openclaw nodes invoke --node --command screen.snapshot --params '{"screenIndex":0,"format":"png"}' # List cameras openclaw nodes invoke --node --command camera.list diff --git a/docs/WINDOWS_NODE_ARCHITECTURE.md b/docs/WINDOWS_NODE_ARCHITECTURE.md index 458ea7f1..6404af5b 100644 --- a/docs/WINDOWS_NODE_ARCHITECTURE.md +++ b/docs/WINDOWS_NODE_ARCHITECTURE.md @@ -579,8 +579,7 @@ The node protocol requires a stable device identity (`device.id`) derived from a - [x] `camera.snap` — capture photo from webcam (MediaCapture + frame reader fallback) - [ ] `camera.clip` — record short video clip (MediaCapture + MediaEncoding) - [ ] `screen.record` — capture Windows desktop via Graphics Capture API -- [x] `screen.capture` — screenshot via Windows.Graphics.Capture -- [x] `screen.list` — enumerate monitors with bounds/working area +- [x] `screen.snapshot` — screenshot via Windows.Graphics.Capture - [x] Permission prompts (camera: UnauthorizedAccessException → toast; future MSIX consent) - [x] Multi-monitor support for screen capture (`screenIndex` param) diff --git a/docs/WINDOWS_NODE_TESTING.md b/docs/WINDOWS_NODE_TESTING.md index 1552c071..61fd8197 100644 --- a/docs/WINDOWS_NODE_TESTING.md +++ b/docs/WINDOWS_NODE_TESTING.md @@ -44,8 +44,7 @@ These features need the gateway to send `node.invoke` commands: | `canvas.hide` | Hide canvas window | Closes the canvas window | | `canvas.eval` | Execute JavaScript | Runs JS in canvas, returns result | | `canvas.snapshot` | Capture canvas | Returns base64 PNG of canvas content | -| `screen.capture` | Take screenshot | Captures screen, shows notification, returns base64 | -| `screen.list` | List monitors | Returns array of monitor info | +| `screen.snapshot` | Take screenshot | Captures screen, shows notification, returns base64 | | `system.notify` | Show notification | Displays toast notification | | `camera.list` | Enumerate cameras | Returns device IDs and names | | `camera.snap` | Capture photo | Returns base64 image (NV12 fallback) | diff --git a/docs/gateway-node-integration.md b/docs/gateway-node-integration.md new file mode 100644 index 00000000..2a95b56a --- /dev/null +++ b/docs/gateway-node-integration.md @@ -0,0 +1,396 @@ +# OpenClaw Gateway ↔ Windows Node Integration Guide + +> Last updated: 2026-04-25 +> Source of truth: [`openclaw/openclaw` — `src/gateway/node-command-policy.ts`](https://github.com/openclaw/openclaw/blob/main/src/gateway/node-command-policy.ts) + +This document captures everything we've learned about how the OpenClaw gateway handles node commands, platform allowlists, and the QR bootstrap pairing flow. It exists because these details are not obvious from the docs alone and caused real debugging sessions. + +--- + +## 1. The Gateway Command Allowlist System + +Every command a node sends must pass **two** gates before it works: + +1. **The node must declare it** — in the `commands` array of the `connect` handshake +2. **The gateway must allow it** — via a per-platform allowlist in `node-command-policy.ts` + +If either gate fails, the command is silently dropped or rejected with: +``` +node command not allowed: "X" is not in the allowlist for platform "Y" +``` + +### 1.1 Per-Platform Default Allowlists + +The gateway has hardcoded defaults per platform (from `PLATFORM_DEFAULTS`): + +| Platform | Default Commands | +|----------|-----------------| +| **macOS** | canvas.*, camera.list, location.get, device.info/status, contacts.search, calendar.events, reminders.list, photos.latest, motion.*, system.run/which/notify, screen.snapshot, browser.proxy | +| **iOS** | canvas.*, camera.list, location.get, device.info/status, contacts.*, calendar.*, reminders.*, photos.latest, motion.*, system.notify | +| **Android** | canvas.*, camera.list, location.get, notifications.*, device.*, contacts.*, calendar.*, callLog.search, reminders.*, photos.latest, motion.*, system.notify | +| **Windows** | **system.run, system.run.prepare, system.which, system.notify, browser.proxy** | +| **Linux** | system.run, system.run.prepare, system.which, system.notify, browser.proxy | +| **Unknown** | canvas.*, camera.list, location.get, system.notify | + +**Windows and Linux get almost nothing by default** — only system commands. No canvas, no camera, no screen, no location. This is because Windows/Linux were originally designed as headless "node host" platforms (exec-only), not full companion apps like macOS/iOS. + +### 1.2 "Dangerous" Commands (Always Need Explicit Opt-In) + +These commands are **never** in any platform's defaults, regardless of platform: + +```typescript +CAMERA_DANGEROUS_COMMANDS = ["camera.snap", "camera.clip"] +SCREEN_DANGEROUS_COMMANDS = ["screen.record"] +CONTACTS_DANGEROUS_COMMANDS = ["contacts.add"] +CALENDAR_DANGEROUS_COMMANDS = ["calendar.add"] +REMINDERS_DANGEROUS_COMMANDS = ["reminders.add"] +SMS_DANGEROUS_COMMANDS = ["sms.send", "sms.search"] +``` + +Even macOS doesn't get `camera.snap` or `camera.clip` by default! They must be added via `gateway.nodes.allowCommands`. + +### 1.3 How to Enable Commands for Windows + +Add ALL needed commands to `gateway.nodes.allowCommands` in `~/.openclaw/openclaw.json`: + +```json5 +{ + gateway: { + nodes: { + allowCommands: [ + // Canvas + "canvas.present", + "canvas.hide", + "canvas.navigate", + "canvas.eval", + "canvas.snapshot", + "canvas.a2ui.push", + "canvas.a2ui.reset", + // Camera (all are dangerous or not in Windows defaults) + "camera.list", + "camera.snap", + "camera.clip", + // Screen + "screen.snapshot", + "screen.record", + // Location + "location.get", + // System (already in Windows defaults, but listed for completeness) + // "system.run", + // "system.run.prepare", + // "system.which", + // "system.notify", + // Exec approvals + "system.execApprovals.get", + "system.execApprovals.set", + ] + } + } +} +``` + +After changing config: +```bash +openclaw gateway restart +``` + +After changing the node's command list (code change), you must **re-pair**: +```bash +openclaw devices list # find old device +openclaw devices reject # reject the old pairing +# Node will auto-reconnect and create a new pairing request +openclaw devices list # find new request +openclaw devices approve # approve with updated commands +``` + +### 1.4 Why Re-Pairing is Needed + +The gateway snapshots the node's declared `commands` array at **pairing approval time**. If you change the node's code to add new commands and restart it, the gateway still uses the old snapshot. You must reject the old pairing and approve a new one. + +### 1.5 `denyCommands` + +You can also explicitly deny commands: +```json5 +{ gateway: { nodes: { denyCommands: ["system.run"] } } } +``` +`denyCommands` wins over `allowCommands`. + +--- + +## 2. Command Name Mismatches (Bugs We Found) + +### 2.1 `screen.capture` → Should Be `screen.snapshot` + +The Windows node previously registered `screen.capture` as a command name. The gateway calls it **`screen.snapshot`**: + +```typescript +// Gateway source (node-command-policy.ts) +const SCREEN_COMMANDS = ["screen.snapshot"]; +``` + +The macOS node uses `screen.snapshot`. `screen.capture` is not recognized by the gateway at all — it's silently filtered out of the declared commands. + +**Fixed locally**: `ScreenCapability.cs` now advertises and handles `screen.snapshot`. + +### 2.2 `screen.list` — Not a Gateway Command + +Our node previously registered `screen.list`. This command does not exist in the gateway's command policy. It's never in any default allowlist. + +**Fixed locally**: `screen.list` is no longer advertised. + +### 2.3 Verified Correct Names + +| Our Command | Gateway Canonical | Status | +|-------------|-------------------|--------| +| `camera.list` | `camera.list` | ✅ Match | +| `camera.snap` | `camera.snap` | ✅ Match (dangerous) | +| `camera.clip` | `camera.clip` | ✅ Match (dangerous) | +| `screen.snapshot` | `screen.snapshot` | ✅ Match | +| `location.get` | `location.get` | ✅ Match | +| `system.notify` | `system.notify` | ✅ Match | +| `system.run` | `system.run` | ✅ Match | +| `system.run.prepare` | `system.run.prepare` | ✅ Match | +| `system.which` | `system.which` | ✅ Match | +| `canvas.present` | `canvas.present` | ✅ Match | +| `canvas.hide` | `canvas.hide` | ✅ Match | +| `canvas.navigate` | `canvas.navigate` | ✅ Match | +| `canvas.eval` | `canvas.eval` | ✅ Match | +| `canvas.snapshot` | `canvas.snapshot` | ✅ Match | +| `canvas.a2ui.push` | `canvas.a2ui.push` | ✅ Match | +| `canvas.a2ui.reset` | `canvas.a2ui.reset` | ✅ Match | + +### 2.4 Commands We're Missing vs macOS + +| Command | macOS | Windows | Notes | +|---------|-------|---------|-------| +| `screen.record` | ✅ | ❌ | Video recording (PR #159 pending) | +| `canvas.a2ui.pushJSONL` | ✅ (in gateway allowlist) | ❌ | Not widely used | +| `device.info` | ✅ | ❌ | Hardware/OS info | +| `device.status` | ✅ | ❌ | Battery/charging status | +| `browser.proxy` | ✅ | ❌ | Chrome DevTools proxy | + +--- + +## 3. Platform Detection + +The gateway detects platform from two fields in the `connect` handshake: + +```typescript +// Our connect payload +client: { + platform: "windows", // ← Primary signal + mode: "node", +} +``` + +Detection logic (from `node-command-policy.ts`): +1. Normalize `platform` → lowercase +2. Match against prefix rules: `"win"` → windows, `"mac"/"darwin"` → macos, etc. +3. If no match, try `deviceFamily` field +4. If still no match → `"unknown"` (gets conservative defaults) + +Our node sends `platform: "windows"` which correctly matches the `windows` prefix rule. + +**The problem isn't detection — it's that the `windows` platform intentionally gets a minimal allowlist.** The gateway team designed Windows as a headless exec host, not a full companion app with camera/canvas/screen. + +### 3.1 What "Unknown" Gets (and Why It's Actually Better) + +Ironically, the `unknown` platform gets MORE than Windows: +```typescript +unknown: [ + ...CANVAS_COMMANDS, + ...CAMERA_COMMANDS, // camera.list + ...LOCATION_COMMANDS, // location.get + NODE_SYSTEM_NOTIFY_COMMAND, +] +``` + +If we sent `platform: "windows-desktop"` (which wouldn't match any prefix rule), we'd fall through to `unknown` and actually get canvas/camera/location defaults. But that would be a hack — the right fix is `gateway.nodes.allowCommands`. + +--- + +## 4. The QR / Bootstrap Token Flow + +### 4.1 What `openclaw qr` Does + +1. Calls `issueDeviceBootstrapToken()` on the gateway +2. Generates a **short-lived, single-use** `bootstrapToken` +3. Encodes `{ url, bootstrapToken, expiresAtMs }` as base64url +4. Renders as QR code or pasteable setup code + +### 4.2 bootstrapToken vs gateway.auth.token + +| | `bootstrapToken` | `gateway.auth.token` | +|---|---|---| +| **Purpose** | Initial device pairing | Shared-secret auth for operators | +| **Lifetime** | Short-lived, single-use | Permanent until changed | +| **Scope** | Node pairing + bounded operator bootstrap | Full operator access | +| **Generated by** | `openclaw qr` / `/pair` | User config in `openclaw.json` | +| **Auto-approval** | Yes — gateway auto-approves bootstrap-token handshakes | No — manual `devices approve` needed | + +### 4.3 The Auth Cascade (How the Gateway Resolves Auth) + +When a node connects with `auth: { token: "...", bootstrapToken: "..." }`, the gateway tries (from `auth-context.ts`): + +1. **Shared-secret auth** — `auth.token` vs `gateway.auth.token/password` +2. **Bootstrap token** — `auth.bootstrapToken` vs issued bootstrap tokens + - If valid: `authMethod = "bootstrap-token"`, auto-approved! + - Preferred over shared-secret even if both succeed (QR flow relies on this) +3. **Device token** — `auth.token` as device-token fallback (for already-paired devices) + +### 4.4 What Our Setup Wizard Does (and the Gap) + +Currently, our Setup Wizard: +1. Decodes the setup code from `openclaw qr` +2. Extracts `url` and `bootstrapToken` +3. Stores `bootstrapToken` as the settings `Token` field +4. Sends it as `auth.token` in the connect handshake + +**The problem**: We send it as `auth.token`, not `auth.bootstrapToken`. The gateway's auth resolution: +- Tries `auth.token` as shared-secret → **fails** (it's not the gateway token) +- Never sees `auth.bootstrapToken` → never tries bootstrap-token auth +- Falls back to device-token → **fails** (no prior pairing) + +**The fix**: Send the bootstrap token as `auth.bootstrapToken` in the connect payload, separate from `auth.token`. This lets the gateway correctly classify it as a bootstrap-token handshake, which enables: +- Silent auto-approval (no manual `devices approve` needed) +- Bootstrap token revocation after pairing +- Bounded operator token handoff (if configured) + +### 4.5 Post-Pairing: Device Tokens + +After a successful bootstrap-token pairing: +1. Gateway issues a `deviceToken` in `hello-ok.auth.deviceToken` +2. Node should **save** this device token +3. Future connections use `auth.token = ` (device-token auth path) +4. The bootstrap token is revoked and no longer valid + +**We're not doing step 2-3 yet.** Our node uses the same settings token forever. It works because the settings token matches the gateway's shared secret (if the user entered it manually), but it means QR-based pairing doesn't complete the handoff properly. + +### 4.6 Ideal Bootstrap Flow (What We Should Implement) + +``` +1. User runs `openclaw qr` on gateway host +2. User pastes setup code into Windows Setup Wizard +3. Wizard decodes → { url, bootstrapToken, expiresAtMs } +4. Node connects with: auth: { bootstrapToken: "" } +5. Gateway auto-approves pairing (bootstrap-token auth method) +6. Gateway returns hello-ok with: auth: { deviceToken: "" } +7. Node saves deviceToken to identity store +8. Future connections use: auth: { token: "" } +9. No manual `devices approve` needed! +``` + +This would make pairing truly seamless — scan QR, auto-paired, done. + +--- + +## 5. Recommendations + +### 5.0 Design Conclusion: Safe Windows/macOS Parity + +The root issue is not that the gateway fails to recognize Windows. It recognizes Windows correctly. The problem is that `platform: "windows"` currently gets only the headless exec-host defaults, while the Windows tray app is now a full node that can declare canvas, camera, location, and screen capabilities. + +The simplest upstream fix is to make Windows match macOS for **safe declared commands**, while keeping dangerous commands explicit opt-in. + +This does **not** make every Windows node capable of camera/canvas/location/screen. A command still has to pass both gates: + +1. The node must declare the command. +2. The gateway policy must allow the command. + +So a headless Windows node host that only declares `system.run` / `system.which` remains exec-only. Expanding the Windows default allowlist just stops the gateway from filtering safe commands that a Windows node explicitly advertises. + +Recommended gateway defaults: + +| Command bucket | Windows default? | Reason | +|----------------|------------------|--------| +| Safe declared companion commands: `canvas.*`, `camera.list`, `location.get`, `screen.snapshot`, `device.info`, `device.status` | Yes | Matches macOS parity and only applies when declared by the node | +| Dangerous/privacy-heavy commands: `camera.snap`, `camera.clip`, `screen.record`, write commands like `contacts.add` | No | Existing gateway model already requires explicit `gateway.nodes.allowCommands` | +| Exec commands: `system.run`, `system.run.prepare`, `system.which`, `system.notify`, `browser.proxy` | Yes | Existing Windows headless-host behavior | + +Until the gateway expands Windows safe defaults, the practical local solution is: + +1. Keep declaring the correct command names from the Windows node. +2. Configure `gateway.nodes.allowCommands` for the Windows companion features. +3. Re-pair after command-list changes because the gateway snapshots commands at approval time. + +### 5.1 Immediate Code Fixes (This Branch) + +- [x] Rename `screen.capture` → `screen.snapshot` in `ScreenCapability.cs` +- [x] Remove `screen.list` from declared commands +- [ ] Remove debug logging from `WindowsNodeClient.cs` (done) + +### 5.2 Setup Wizard Improvements (Next Sprint) + +- [ ] Send `bootstrapToken` in correct field: `auth.bootstrapToken` not `auth.token` +- [ ] Handle `hello-ok.auth.deviceToken` — save it for future connections +- [ ] Show "auto-paired!" vs "waiting for approval" based on auth method +- [ ] Handle bootstrap token expiry gracefully (re-generate if expired) + +### 5.3 Upstream Contributions / Issues to File + +- [ ] **Request Windows/macOS parity for safe declared commands** — Windows should allow the same safe companion commands macOS does, while dangerous commands stay explicit opt-in. +- [ ] **Document `gateway.nodes.allowCommands`** — it's not in the config reference page +- [ ] **Consider `canvas.a2ui.pushJSONL`** — it's in the gateway allowlist but we don't implement it + +#### Upstream issue draft + +**Title:** Expand Windows node default allowlist for safe declared companion commands + +**Body:** + +Windows nodes are currently treated like Linux/headless exec hosts in `src/gateway/node-command-policy.ts`: + +```ts +windows: [...SYSTEM_COMMANDS] +``` + +That means the gateway filters out safe companion-app commands that a Windows node explicitly declares, including `canvas.*`, `camera.list`, `location.get`, and `screen.snapshot`. The Windows tray app is now a full companion node, not just an exec host, so this causes confusing behavior: the node can implement and advertise a command, but the gateway drops/rejects it unless users manually configure `gateway.nodes.allowCommands`. + +Proposal: + +- Add safe declared companion commands to Windows defaults, similar to macOS: + - `canvas.present` + - `canvas.hide` + - `canvas.navigate` + - `canvas.eval` + - `canvas.snapshot` + - `canvas.a2ui.push` + - `canvas.a2ui.pushJSONL` + - `canvas.a2ui.reset` + - `camera.list` + - `location.get` + - `screen.snapshot` + - optionally `device.info` / `device.status` +- Keep dangerous/privacy-heavy commands explicit opt-in via `gateway.nodes.allowCommands`: + - `camera.snap` + - `camera.clip` + - `screen.record` + - write commands such as `contacts.add`, `calendar.add`, etc. + +This does not grant capabilities to headless Windows hosts by itself. A command still has to pass both gates: the node must declare it in `commands`, and the gateway policy must allow it. Headless Windows node hosts that only declare `system.run` / `system.which` remain exec-only. + +Related documentation gap: `gateway.nodes.allowCommands` and `gateway.nodes.denyCommands` should be documented in the gateway configuration reference, including the requirement to re-pair after command-list changes because approved pairing records snapshot declared commands. + +### 5.4 User-Facing Documentation + +When shipping the Windows node, README/wiki should tell users: + +> **First-time setup**: After pairing your Windows node, add these commands to your gateway config: +> ```bash +> openclaw config set gateway.nodes.allowCommands '["canvas.present", "canvas.hide", "canvas.navigate", "canvas.eval", "canvas.snapshot", "canvas.a2ui.push", "canvas.a2ui.reset", "camera.list", "camera.snap", "camera.clip", "screen.snapshot", "location.get", "system.execApprovals.get", "system.execApprovals.set"]' +> openclaw gateway restart +> ``` +> Then re-pair the node (`openclaw devices reject ` + re-approve). + +--- + +## 6. Reference: Gateway Source Files + +| File | What It Does | +|------|-------------| +| `src/gateway/node-command-policy.ts` | Platform allowlists, dangerous commands, command filtering | +| `src/gateway/device-metadata-normalization.ts` | Platform string normalization | +| `src/infra/node-commands.ts` | Constants: `system.run/which/notify`, `browser.proxy`, `execApprovals.*` | +| `src/gateway/server/ws-connection/auth-context.ts` | Auth cascade: shared-secret → bootstrap-token → device-token | +| `extensions/device-pair/index.ts` | QR generation, bootstrap token issuance, pairing flow | +| `src/cli/nodes-screen.ts` | CLI screen record helpers (confirms `screen.record` naming) | diff --git a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs index 05829616..d17c0662 100644 --- a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs @@ -13,8 +13,7 @@ public class ScreenCapability : NodeCapabilityBase private static readonly string[] _commands = new[] { - "screen.capture", - "screen.list" + "screen.snapshot" // Future: "screen.record" }; @@ -22,7 +21,6 @@ public class ScreenCapability : NodeCapabilityBase // Events for UI/platform-specific implementation public event Func>? CaptureRequested; - public event Func>? ListRequested; public ScreenCapability(IOpenClawLogger logger) : base(logger) { @@ -32,8 +30,7 @@ public override async Task ExecuteAsync(NodeInvokeRequest re { return request.Command switch { - "screen.capture" => await HandleCaptureAsync(request), - "screen.list" => await HandleListAsync(request), + "screen.snapshot" => await HandleCaptureAsync(request), _ => Error($"Unknown command: {request.Command}") }; } @@ -47,7 +44,7 @@ private async Task HandleCaptureAsync(NodeInvokeRequest requ var screenIndex = GetIntArg(request.Args, "screenIndex", monitor); var includePointer = GetBoolArg(request.Args, "includePointer", true); - Logger.Info($"screen.capture: format={format}, maxWidth={maxWidth}, monitor={screenIndex}"); + Logger.Info($"screen.snapshot: format={format}, maxWidth={maxWidth}, monitor={screenIndex}"); if (CaptureRequested == null) { @@ -81,39 +78,6 @@ private async Task HandleCaptureAsync(NodeInvokeRequest requ return Error($"Capture failed: {ex.Message}"); } } - - private async Task HandleListAsync(NodeInvokeRequest request) - { - Logger.Info("screen.list"); - - if (ListRequested == null) - { - return Error("Screen list not available"); - } - - try - { - var screens = await ListRequested(); - var formatted = new List(); - foreach (var screen in screens) - { - formatted.Add(new - { - index = screen.Index, - name = screen.Name, - primary = screen.IsPrimary, - bounds = new { x = screen.X, y = screen.Y, width = screen.Width, height = screen.Height }, - workingArea = new { x = screen.WorkingX, y = screen.WorkingY, width = screen.WorkingWidth, height = screen.WorkingHeight } - }); - } - return Success(new { screens = formatted }); - } - catch (Exception ex) - { - Logger.Error("Screen list failed", ex); - return Error($"List failed: {ex.Message}"); - } - } } public class ScreenCaptureArgs @@ -133,17 +97,3 @@ public class ScreenCaptureResult public string Base64 { get; set; } = ""; } -public class ScreenInfo -{ - public int Index { get; set; } - public string Name { get; set; } = ""; - public int Width { get; set; } - public int Height { get; set; } - public int X { get; set; } - public int Y { get; set; } - public int WorkingX { get; set; } - public int WorkingY { get; set; } - public int WorkingWidth { get; set; } - public int WorkingHeight { get; set; } - public bool IsPrimary { get; set; } -} diff --git a/src/OpenClaw.Tray.WinUI/App.xaml.cs b/src/OpenClaw.Tray.WinUI/App.xaml.cs index 60d7b4c0..ed33ac8d 100644 --- a/src/OpenClaw.Tray.WinUI/App.xaml.cs +++ b/src/OpenClaw.Tray.WinUI/App.xaml.cs @@ -1959,6 +1959,10 @@ private async Task CheckForUpdatesAsync() { try { +#if DEBUG + Logger.Info("Skipping update check in debug build"); + return true; +#else Logger.Info("Checking for updates..."); var updateFound = await AppUpdater.CheckForUpdatesAsync(); @@ -2000,6 +2004,7 @@ private async Task CheckForUpdatesAsync() } return true; // RemindLater or Skip - continue +#endif } catch (Exception ex) { diff --git a/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs b/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs index 42c22bd6..5f6843ce 100644 --- a/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs @@ -139,7 +139,7 @@ public async Task ClipAsync(CameraClipArgs args) var settings = new MediaCaptureInitializationSettings { VideoDeviceId = args.DeviceId, - MemoryPreference = MediaCaptureMemoryPreference.Cpu, + MemoryPreference = MediaCaptureMemoryPreference.Auto, StreamingCaptureMode = args.IncludeAudio ? StreamingCaptureMode.AudioAndVideo : StreamingCaptureMode.Video @@ -148,9 +148,10 @@ public async Task ClipAsync(CameraClipArgs args) var initStart = DateTime.UtcNow; await capture.InitializeAsync(settings); _logger.Info($"camera.clip: MediaCapture initialized in {(DateTime.UtcNow - initStart).TotalMilliseconds:0}ms"); - + + var recordProperties = await TryConfigureVideoRecordStreamAsync(capture); using var stream = new InMemoryRandomAccessStream(); - var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.HD720p); + var profile = CreateClipProfile(args.IncludeAudio, recordProperties); var recordStart = DateTime.UtcNow; await capture.StartRecordToStreamAsync(profile, stream); @@ -163,7 +164,7 @@ public async Task ClipAsync(CameraClipArgs args) _logger.Info($"camera.clip: recording stopped after {elapsed:0}ms"); stream.Seek(0); - var reader = new DataReader(stream); + using var reader = new DataReader(stream); await reader.LoadAsync((uint)stream.Size); var buffer = new byte[stream.Size]; reader.ReadBytes(buffer); @@ -222,6 +223,74 @@ private static bool IsInvalidMediaType(Exception ex) return ex.HResult == MfEInvalidMediaType; } + private async Task TryConfigureVideoRecordStreamAsync(MediaCapture capture) + { + var props = capture.VideoDeviceController + .GetAvailableMediaStreamProperties(MediaStreamType.VideoRecord) + .OfType() + .Where(p => p.Width > 0 && p.Height > 0) + .ToList(); + + if (props.Count == 0) + { + _logger.Warn("camera.clip: no video record stream properties available; using automatic MP4 profile"); + return null; + } + + var bounded = props.Where(p => p.Width <= 1280 && p.Height <= 720).ToList(); + var candidates = (bounded.Count > 0 ? bounded : props) + .OrderByDescending(p => p.Width) + .ThenByDescending(p => p.Height) + .ThenByDescending(p => p.Bitrate) + .ToList(); + + foreach (var candidate in candidates) + { + try + { + await capture.VideoDeviceController.SetMediaStreamPropertiesAsync(MediaStreamType.VideoRecord, candidate); + _logger.Info($"camera.clip: using record stream {candidate.Subtype} {candidate.Width}x{candidate.Height}"); + return candidate; + } + catch (Exception ex) when (IsInvalidMediaType(ex)) + { + _logger.Warn($"camera.clip: record stream {candidate.Subtype} {candidate.Width}x{candidate.Height} not supported"); + } + } + + _logger.Warn("camera.clip: no compatible record stream properties accepted; using automatic MP4 profile"); + return null; + } + + private static MediaEncodingProfile CreateClipProfile(bool includeAudio, VideoEncodingProperties? recordProperties) + { + var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.Auto); + + if (!includeAudio) + { + profile.Audio = null; + } + + if (recordProperties != null) + { + profile.Video.Width = recordProperties.Width; + profile.Video.Height = recordProperties.Height; + + if (recordProperties.Bitrate > 0) + { + profile.Video.Bitrate = recordProperties.Bitrate; + } + + if (recordProperties.FrameRate.Numerator > 0 && recordProperties.FrameRate.Denominator > 0) + { + profile.Video.FrameRate.Numerator = recordProperties.FrameRate.Numerator; + profile.Video.FrameRate.Denominator = recordProperties.FrameRate.Denominator; + } + } + + return profile; + } + private static List SelectPhotoEncodings(MediaCapture capture, string format, int maxWidth) { var props = capture.VideoDeviceController diff --git a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs index e83fc684..7121847e 100644 --- a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs @@ -129,7 +129,6 @@ private void RegisterCapabilities() // Screen capability _screenCapability = new ScreenCapability(_logger); - _screenCapability.ListRequested += OnScreenList; _screenCapability.CaptureRequested += OnScreenCapture; _nodeClient.RegisterCapability(_screenCapability); @@ -414,12 +413,6 @@ private void OnCanvasA2UIReset(object? sender, EventArgs args) #region Screen Capability Handlers - private Task OnScreenList() - { - return _screenCaptureService?.ListScreensAsync() - ?? Task.FromResult(Array.Empty()); - } - private async Task OnScreenCapture(ScreenCaptureArgs args) { if (_screenCaptureService == null) diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenCaptureService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenCaptureService.cs index 293094c4..5bc86ac0 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenCaptureService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenCaptureService.cs @@ -21,41 +21,6 @@ public ScreenCaptureService(IOpenClawLogger logger) _logger = logger; } - /// - /// List all available screens - /// - public Task ListScreensAsync() - { - var result = new System.Collections.Generic.List(); - - EnumDisplayMonitors(IntPtr.Zero, IntPtr.Zero, (IntPtr hMonitor, IntPtr hdcMonitor, ref RECT lprcMonitor, IntPtr dwData) => - { - var info = new MONITORINFOEX(); - info.cbSize = Marshal.SizeOf(typeof(MONITORINFOEX)); - - if (GetMonitorInfo(hMonitor, ref info)) - { - result.Add(new ScreenInfo - { - Index = result.Count, - Name = info.szDevice, - Width = info.rcMonitor.right - info.rcMonitor.left, - Height = info.rcMonitor.bottom - info.rcMonitor.top, - IsPrimary = (info.dwFlags & MONITORINFOF_PRIMARY) != 0, - X = info.rcMonitor.left, - Y = info.rcMonitor.top, - WorkingX = info.rcWork.left, - WorkingY = info.rcWork.top, - WorkingWidth = info.rcWork.right - info.rcWork.left, - WorkingHeight = info.rcWork.bottom - info.rcWork.top - }); - } - return true; - }, IntPtr.Zero); - - return Task.FromResult(result.ToArray()); - } - /// /// Capture a screenshot /// diff --git a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs index 985fe714..c9f554e0 100644 --- a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs +++ b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs @@ -681,8 +681,9 @@ private static JsonElement Parse(string json) public void CanHandle_ScreenCommands() { var cap = new ScreenCapability(NullLogger.Instance); - Assert.True(cap.CanHandle("screen.capture")); - Assert.True(cap.CanHandle("screen.list")); + Assert.True(cap.CanHandle("screen.snapshot")); + Assert.False(cap.CanHandle("screen.capture")); + Assert.False(cap.CanHandle("screen.list")); Assert.False(cap.CanHandle("screen.record")); Assert.Equal("screen", cap.Category); } @@ -691,7 +692,7 @@ public void CanHandle_ScreenCommands() public async Task Capture_ReturnsError_WhenNoHandler() { var cap = new ScreenCapability(NullLogger.Instance); - var req = new NodeInvokeRequest { Id = "s1", Command = "screen.capture", Args = Parse("""{}""") }; + var req = new NodeInvokeRequest { Id = "s1", Command = "screen.snapshot", Args = Parse("""{}""") }; var res = await cap.ExecuteAsync(req); Assert.False(res.Ok); Assert.Contains("not available", res.Error, StringComparison.OrdinalIgnoreCase); @@ -711,7 +712,7 @@ public async Task Capture_CallsHandler_WithArgs() var req = new NodeInvokeRequest { Id = "s2", - Command = "screen.capture", + Command = "screen.snapshot", Args = Parse("""{"format":"jpeg","maxWidth":800,"quality":50,"screenIndex":1}""") }; @@ -724,70 +725,18 @@ public async Task Capture_CallsHandler_WithArgs() Assert.Equal(1, receivedArgs.MonitorIndex); } - [Fact] - public async Task List_ReturnsError_WhenNoHandler() - { - var cap = new ScreenCapability(NullLogger.Instance); - var req = new NodeInvokeRequest { Id = "s3", Command = "screen.list", Args = Parse("""{}""") }; - var res = await cap.ExecuteAsync(req); - Assert.False(res.Ok); - Assert.NotNull(res.Error); - Assert.Contains("not available", res.Error!, StringComparison.OrdinalIgnoreCase); - } - - [Fact] - public async Task List_ReturnsScreens_WhenHandler() - { - var cap = new ScreenCapability(NullLogger.Instance); - cap.ListRequested += () => Task.FromResult(new[] - { - new ScreenInfo { Index = 0, Name = "Main", IsPrimary = true, Width = 2560, Height = 1440 } - }); - - var req = new NodeInvokeRequest { Id = "s4", Command = "screen.list", Args = Parse("""{}""") }; - var res = await cap.ExecuteAsync(req); - Assert.True(res.Ok); - Assert.NotNull(res.Payload); - - // Verify payload contains expected screen data - var json = System.Text.Json.JsonSerializer.Serialize(res.Payload); - using var doc = System.Text.Json.JsonDocument.Parse(json); - var root = doc.RootElement; - Assert.True(root.TryGetProperty("screens", out var screensEl)); - Assert.Equal(System.Text.Json.JsonValueKind.Array, screensEl.ValueKind); - Assert.Equal(1, screensEl.GetArrayLength()); - var screen = screensEl[0]; - Assert.Equal("Main", screen.GetProperty("name").GetString()); - Assert.True(screen.GetProperty("primary").GetBoolean()); - var bounds = screen.GetProperty("bounds"); - Assert.Equal(2560, bounds.GetProperty("width").GetInt32()); - Assert.Equal(1440, bounds.GetProperty("height").GetInt32()); - } - [Fact] public async Task Capture_ReturnsError_WhenHandlerThrows() { var cap = new ScreenCapability(NullLogger.Instance); cap.CaptureRequested += (args) => throw new InvalidOperationException("Display access denied"); - var req = new NodeInvokeRequest { Id = "s5", Command = "screen.capture", Args = Parse("""{}""") }; + var req = new NodeInvokeRequest { Id = "s5", Command = "screen.snapshot", Args = Parse("""{}""") }; var res = await cap.ExecuteAsync(req); Assert.False(res.Ok); Assert.Contains("Display access denied", res.Error); } - [Fact] - public async Task List_ReturnsError_WhenHandlerThrows() - { - var cap = new ScreenCapability(NullLogger.Instance); - cap.ListRequested += () => throw new InvalidOperationException("Screen enumeration failed"); - - var req = new NodeInvokeRequest { Id = "s6", Command = "screen.list", Args = Parse("""{}""") }; - var res = await cap.ExecuteAsync(req); - Assert.False(res.Ok); - Assert.Contains("Screen enumeration failed", res.Error); - } - [Fact] public async Task Capture_ResponseIncludesDataUri() { @@ -800,7 +749,7 @@ public async Task Capture_ResponseIncludesDataUri() Base64 = "abc123" }); - var req = new NodeInvokeRequest { Id = "s7", Command = "screen.capture", Args = Parse("""{}""") }; + var req = new NodeInvokeRequest { Id = "s7", Command = "screen.snapshot", Args = Parse("""{}""") }; var res = await cap.ExecuteAsync(req); Assert.True(res.Ok); @@ -827,7 +776,7 @@ public async Task Capture_UsesMonitorAlias_ForScreenIndex() var req = new NodeInvokeRequest { Id = "s8", - Command = "screen.capture", + Command = "screen.snapshot", Args = Parse("""{"monitor":2}""") }; var res = await cap.ExecuteAsync(req); @@ -1207,4 +1156,4 @@ public async Task ExecuteAsync_ReturnsError_ForUnknownCommand() Assert.False(res.Ok); Assert.Contains("Unknown command", res.Error); } -} \ No newline at end of file +} diff --git a/tests/OpenClaw.Shared.Tests/README.md b/tests/OpenClaw.Shared.Tests/README.md index 9346171d..ccc3f58d 100644 --- a/tests/OpenClaw.Shared.Tests/README.md +++ b/tests/OpenClaw.Shared.Tests/README.md @@ -62,11 +62,11 @@ dotnet test --filter "FullyQualifiedName~AgentActivityTests" - ✅ A2UI reset raises event #### ScreenCapabilityTests (5 tests) -- ✅ CanHandle screen.capture and screen.list +- ✅ CanHandle screen.snapshot and rejects non-gateway screen.capture/screen.list - ✅ Capture returns error when no handler - ✅ Capture calls handler with parsed args (format, maxWidth, quality, screenIndex) -- ✅ List returns error when no handler -- ✅ List returns screens when handler set +- ✅ Capture returns error when handler throws +- ✅ Capture includes data URI response #### CameraCapabilityTests (7 tests) - ✅ CanHandle camera.list and camera.snap