From 22e378f8a7d5793626ec50d57311da94c2ace1ca Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 8 Apr 2026 18:44:15 +0200 Subject: [PATCH 01/10] feat: add screen.record to ScreenCapability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New command in the shared capability layer: - screen.record: fixed-duration capture; blocks until done and returns the video as base64 MP4. Args: durationMs (def. 5000), fps (def. 10), screenIndex/monitor (def. 0). The monitor→screenIndex alias keeps consistency with screen.capture. Co-Authored-By: Claude Sonnet 4.6 --- .../Capabilities/ScreenCapability.cs | 70 +++++++++++++++++-- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs index 0582961..e214603 100644 --- a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs @@ -14,15 +14,16 @@ public class ScreenCapability : NodeCapabilityBase private static readonly string[] _commands = new[] { "screen.capture", - "screen.list" - // Future: "screen.record" + "screen.list", + "screen.record", }; - + public override IReadOnlyList Commands => _commands; - + // Events for UI/platform-specific implementation public event Func>? CaptureRequested; public event Func>? ListRequested; + public event Func>? RecordRequested; public ScreenCapability(IOpenClawLogger logger) : base(logger) { @@ -33,7 +34,8 @@ public override async Task ExecuteAsync(NodeInvokeRequest re return request.Command switch { "screen.capture" => await HandleCaptureAsync(request), - "screen.list" => await HandleListAsync(request), + "screen.list" => await HandleListAsync(request), + "screen.record" => await HandleRecordAsync(request), _ => Error($"Unknown command: {request.Command}") }; } @@ -114,6 +116,64 @@ private async Task HandleListAsync(NodeInvokeRequest request return Error($"List failed: {ex.Message}"); } } + + private async Task HandleRecordAsync(NodeInvokeRequest request) + { + var durationMs = GetIntArg(request.Args, "durationMs", 5000); + var fps = GetIntArg(request.Args, "fps", 10); + var screenIndex = GetIntArg(request.Args, "screenIndex", GetIntArg(request.Args, "monitor", 0)); + + Logger.Info($"screen.record: durationMs={durationMs} fps={fps} screenIndex={screenIndex}"); + + if (RecordRequested == null) + return Error("Screen recording not available"); + + try + { + var result = await RecordRequested(new ScreenRecordArgs + { + DurationMs = durationMs, + Fps = fps, + ScreenIndex = screenIndex, + }); + + return Success(new + { + format = result.Format, + base64 = result.Base64, + durationMs = result.DurationMs, + fps = result.Fps, + screenIndex = result.ScreenIndex, + width = result.Width, + height = result.Height, + hasAudio = result.HasAudio, + }); + } + catch (Exception ex) + { + Logger.Error("screen.record failed", ex); + return Error($"Record failed: {ex.Message}"); + } + } +} + +public class ScreenRecordArgs +{ + public int DurationMs { get; set; } = 5000; + public int Fps { get; set; } = 10; + public int ScreenIndex { get; set; } +} + +public class ScreenRecordResult +{ + public string Base64 { get; set; } = ""; + public string Format { get; set; } = "mp4"; + public int DurationMs { get; set; } + public float Fps { get; set; } + public int ScreenIndex { get; set; } + public int Width { get; set; } + public int Height { get; set; } + public bool HasAudio { get; set; } } public class ScreenCaptureArgs From 45912512d024097dac1178aeff46a333199281b8 Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 8 Apr 2026 18:44:23 +0200 Subject: [PATCH 02/10] feat: add ScreenRecordingService for fixed-duration monitor capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WinRT-based implementation backing screen.record: - D3D11 + Direct3D11CaptureFramePool for GPU-backed frame acquisition - Software BGRA→NV12 conversion (BT.601 limited range) before encoding - MediaTranscoder pipeline with hardware acceleration and SW fallback - No external dependencies: pure P/Invoke (d3d11.dll, combase.dll) Records the full monitor only. Per-window capture is not yet implemented. Co-Authored-By: Claude Sonnet 4.6 --- .../Services/ScreenRecordingService.cs | 328 ++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs new file mode 100644 index 0000000..cbd350a --- /dev/null +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -0,0 +1,328 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Threading; +using System.Threading.Tasks; +using Windows.Graphics.Capture; +using Windows.Graphics.DirectX; +using Windows.Graphics.DirectX.Direct3D11; +using Windows.Graphics.Imaging; +using Windows.Media.MediaProperties; +using Windows.Media.Transcoding; +using Windows.Storage.Streams; +using OpenClaw.Shared; +using OpenClaw.Shared.Capabilities; +using WinRT; + +namespace OpenClawTray.Services; + +/// +/// Records the screen using Windows.Graphics.Capture and encodes to MP4 via MediaTranscoder. +/// +internal sealed class ScreenRecordingService +{ + private readonly IOpenClawLogger _logger; + + private const int MaxFps = 60; + private const int MinFps = 1; + private const int MinDurationMs = 250; + private const int MaxDurationMs = 60_000; + private const int PoolBuffers = 2; + + public ScreenRecordingService(IOpenClawLogger logger) + { + _logger = logger; + } + + // ── Public API ──────────────────────────────────────────────────────────── + + public async Task RecordAsync(ScreenRecordArgs args) + { + var durationMs = Math.Clamp(args.DurationMs, MinDurationMs, MaxDurationMs); + var fps = Math.Clamp(args.Fps, MinFps, MaxFps); + var screenIndex = args.ScreenIndex; + + _logger.Info($"[ScreenRecording] duration={durationMs}ms fps={fps} screen={screenIndex}"); + + var item = CreateCaptureItem(screenIndex); + var width = item.Size.Width; + var height = item.Size.Height; + var d3d = CreateDirect3DDevice(); + + Direct3D11CaptureFramePool? pool = null; + GraphicsCaptureSession? session = null; + var latestFrame = (Direct3D11CaptureFrame?)null; + using var ready = new SemaphoreSlim(0, 1); + var frames = new List(); + + try + { + pool = Direct3D11CaptureFramePool.CreateFreeThreaded( + d3d, + DirectXPixelFormat.B8G8R8A8UIntNormalized, + PoolBuffers, + new global::Windows.Graphics.SizeInt32 { Width = width, Height = height }); + + session = pool.CreateCaptureSession(item); + session.IsCursorCaptureEnabled = false; + + pool.FrameArrived += (p, _) => + { + var f = p.TryGetNextFrame(); + if (f == null) return; + Interlocked.Exchange(ref latestFrame, f)?.Dispose(); + try { ready.Release(); } catch { /* already signaled */ } + }; + + session.StartCapture(); + + var intervalMs = 1000 / fps; + var deadline = DateTime.UtcNow.AddMilliseconds(durationMs); + var nextCapture = DateTime.UtcNow; + + while (DateTime.UtcNow < deadline) + { + var waitMs = (int)(nextCapture - DateTime.UtcNow).TotalMilliseconds; + if (waitMs > 0) + await Task.Delay(waitMs); + + if (!await ready.WaitAsync(intervalMs * 2)) + continue; + + var frame = Interlocked.Exchange(ref latestFrame, null); + if (frame == null) continue; + + using (frame) + { + try + { + var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); + frames.Add(ExtractBitmapBytes(bmp)); + } + catch (Exception ex) + { + _logger.Warn($"[ScreenRecording] Frame skipped: {ex.Message}"); + } + } + + nextCapture = nextCapture.AddMilliseconds(intervalMs); + } + } + finally + { + session?.Dispose(); + pool?.Dispose(); + Interlocked.Exchange(ref latestFrame, null)?.Dispose(); + } + + _logger.Info($"[ScreenRecording] Captured {frames.Count} frames, encoding..."); + + var base64 = await EncodeToMp4Async(frames, width, height, fps); + + return new ScreenRecordResult + { + Format = "mp4", + Base64 = base64, + DurationMs = durationMs, + Fps = fps, + ScreenIndex = screenIndex, + Width = width, + Height = height, + HasAudio = false, + }; + } + + // ── Encoding ────────────────────────────────────────────────────────────── + + private static async Task EncodeToMp4Async( + List frames, int width, int height, int fps) + { + var output = new InMemoryRandomAccessStream(); + + var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.HD720p); + profile.Video.Width = (uint)width; + profile.Video.Height = (uint)height; + profile.Video.FrameRate.Numerator = (uint)fps; + profile.Video.FrameRate.Denominator = 1; + profile.Audio = null; + + var input = BuildRawVideoStream(frames, width, height); + + PrepareTranscodeResult? xcode = null; + try + { + xcode = await new MediaTranscoder { HardwareAccelerationEnabled = true } + .PrepareStreamTranscodeAsync(input, output, profile); + } + catch + { + xcode = await new MediaTranscoder { HardwareAccelerationEnabled = false } + .PrepareStreamTranscodeAsync(input, output, profile); + } + + if (!xcode.CanTranscode) + throw new InvalidOperationException($"Transcode failed: {xcode.FailureReason}"); + + await xcode.TranscodeAsync(); + + output.Seek(0); + var reader = new DataReader(output); + await reader.LoadAsync((uint)output.Size); + var bytes = new byte[output.Size]; + reader.ReadBytes(bytes); + return Convert.ToBase64String(bytes); + } + + private static InMemoryRandomAccessStream BuildRawVideoStream( + List frames, int width, int height) + { + var stream = new InMemoryRandomAccessStream(); + var writer = new DataWriter(stream); + foreach (var frame in frames) + writer.WriteBytes(BgraToNv12(frame, width, height)); + writer.StoreAsync().AsTask().Wait(); + stream.Seek(0); + return stream; + } + + /// BT.601 limited-range BGRA→NV12 conversion. + private static byte[] BgraToNv12(byte[] bgra, int width, int height) + { + var nv12 = new byte[width * height * 3 / 2]; + int yBase = 0; + int uvBase = width * height; + + for (int y = 0; y < height; y++) + for (int x = 0; x < width; x++) + { + int i = (y * width + x) * 4; + byte b = bgra[i], g = bgra[i + 1], r = bgra[i + 2]; + + nv12[yBase++] = (byte)(16 + (66 * r + 129 * g + 25 * b) / 256); + + if ((y & 1) == 0 && (x & 1) == 0) + { + int uv = uvBase + (y / 2 * width) + (x & ~1); + nv12[uv] = (byte)(128 + (-38 * r - 74 * g + 112 * b) / 256); + nv12[uv + 1] = (byte)(128 + (112 * r - 94 * g - 18 * b) / 256); + } + } + + return nv12; + } + + // ── D3D11 / WinRT interop ───────────────────────────────────────────────── + + // IID_IDXGIDevice + private static readonly Guid IID_DXGIDevice = + new Guid("54ec77fa-1377-44e6-8c32-88fd5f44c84c"); + + private static IDirect3DDevice CreateDirect3DDevice() + { + // D3D_DRIVER_TYPE_HARDWARE=1, D3D11_CREATE_DEVICE_BGRA_SUPPORT=0x20, D3D11_SDK_VERSION=7 + D3D11CreateDevice(IntPtr.Zero, 1, IntPtr.Zero, 0x20, IntPtr.Zero, 0, 7, + out var d3dPtr, IntPtr.Zero, IntPtr.Zero); + + var iid = IID_DXGIDevice; + Marshal.QueryInterface(d3dPtr, ref iid, out var dxgiPtr); + Marshal.Release(d3dPtr); + + NativeCreateDirect3D11DeviceFromDXGIDevice(dxgiPtr, out var winrtPtr); + Marshal.Release(dxgiPtr); + + var device = MarshalInterface.FromAbi(winrtPtr); + Marshal.Release(winrtPtr); + return device; + } + + private static GraphicsCaptureItem CreateCaptureItem(int screenIndex) + { + var monitors = GetMonitorHandles(); + if (screenIndex < 0 || screenIndex >= monitors.Count) + screenIndex = 0; + + const string classId = "Windows.Graphics.Capture.GraphicsCaptureItem"; + var iid = typeof(IGraphicsCaptureItemInterop).GUID; + + WindowsCreateString(classId, classId.Length, out var hstring); + try + { + RoGetActivationFactory(hstring, ref iid, out var factoryPtr); + var factory = (IGraphicsCaptureItemInterop)Marshal.GetObjectForIUnknown(factoryPtr); + Marshal.Release(factoryPtr); + + var itemIid = typeof(GraphicsCaptureItem).GUID; + factory.CreateForMonitor(monitors[screenIndex], in itemIid, out var itemPtr); + + var item = MarshalInterface.FromAbi(itemPtr); + Marshal.Release(itemPtr); + return item; + } + finally + { + WindowsDeleteString(hstring); + } + } + + private static List GetMonitorHandles() + { + var handles = new List(); + EnumDisplayMonitors(IntPtr.Zero, IntPtr.Zero, + (hMon, _, ref _, _) => { handles.Add(hMon); return true; }, + IntPtr.Zero); + return handles; + } + + private static byte[] ExtractBitmapBytes(SoftwareBitmap bitmap) + { + var capacity = (uint)(bitmap.PixelWidth * bitmap.PixelHeight * 4); + var buf = new global::Windows.Storage.Streams.Buffer(capacity); + bitmap.CopyToBuffer(buf); + using var dr = DataReader.FromBuffer(buf); + var bytes = new byte[buf.Length]; + dr.ReadBytes(bytes); + return bytes; + } + + // ── P/Invoke declarations ───────────────────────────────────────────────── + + [DllImport("d3d11.dll")] + private static extern int D3D11CreateDevice( + IntPtr pAdapter, uint DriverType, IntPtr Software, uint Flags, + IntPtr pFeatureLevels, uint FeatureLevels, uint SDKVersion, + out IntPtr ppDevice, IntPtr pFeatureLevel, IntPtr ppImmediateContext); + + [DllImport("d3d11.dll", EntryPoint = "CreateDirect3D11DeviceFromDXGIDevice")] + private static extern int NativeCreateDirect3D11DeviceFromDXGIDevice( + IntPtr dxgiDevice, out IntPtr graphicsDevice); + + [DllImport("combase.dll")] + private static extern int WindowsCreateString( + [MarshalAs(UnmanagedType.LPWStr)] string sourceString, int length, out IntPtr hstring); + + [DllImport("combase.dll")] + private static extern int WindowsDeleteString(IntPtr hstring); + + [DllImport("combase.dll")] + private static extern int RoGetActivationFactory( + IntPtr runtimeClassId, ref Guid iid, out IntPtr factory); + + [DllImport("user32.dll")] + private static extern bool EnumDisplayMonitors( + IntPtr hdc, IntPtr lprcClip, MonitorEnumProc lpfnEnum, IntPtr dwData); + + private delegate bool MonitorEnumProc( + IntPtr hMonitor, IntPtr hdcMonitor, ref RECT lprcMonitor, IntPtr dwData); + + [StructLayout(LayoutKind.Sequential)] + private struct RECT { public int Left, Top, Right, Bottom; } + + [ComImport] + [Guid("3628E81B-3CAC-4C60-B7F4-23CE0E0C3356")] + [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)] + private interface IGraphicsCaptureItemInterop + { + void CreateForWindow(IntPtr hwnd, in Guid riid, out IntPtr ppv); + void CreateForMonitor(IntPtr hMonitor, in Guid riid, out IntPtr ppv); + } +} From e4e6abb01ea263cee624de25619ebe1fa9a434ff Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 8 Apr 2026 18:44:29 +0200 Subject: [PATCH 03/10] feat: wire screen.record into NodeService and add capability tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NodeService instantiates ScreenRecordingService and subscribes OnScreenRecord to ScreenCapability's RecordRequested event. Tests cover the full surface of screen.record: missing handler error, correct arg forwarding, defaults (durationMs=5000, fps=10, screenIndex=0), the monitor→screenIndex alias, and exception handling in the handler. Co-Authored-By: Claude Sonnet 4.6 --- .../Services/NodeService.cs | 19 +++- .../OpenClaw.Shared.Tests/CapabilityTests.cs | 107 +++++++++++++++++- 2 files changed, 121 insertions(+), 5 deletions(-) diff --git a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs index 731359f..10b6506 100644 --- a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs @@ -20,6 +20,7 @@ public class NodeService : IDisposable private WindowsNodeClient? _nodeClient; private CanvasWindow? _canvasWindow; private ScreenCaptureService? _screenCaptureService; + private ScreenRecordingService? _screenRecordingService; private CameraCaptureService? _cameraCaptureService; private DateTime _lastScreenCaptureNotification = DateTime.MinValue; private string? _a2uiHostUrl; @@ -49,8 +50,9 @@ public NodeService(IOpenClawLogger logger, DispatcherQueue dispatcherQueue, stri _logger = logger; _dispatcherQueue = dispatcherQueue; _dataPath = dataPath; - _screenCaptureService = new ScreenCaptureService(logger); - _cameraCaptureService = new CameraCaptureService(logger); + _screenCaptureService = new ScreenCaptureService(logger); + _screenRecordingService = new ScreenRecordingService(logger); + _cameraCaptureService = new CameraCaptureService(logger); } /// @@ -125,8 +127,9 @@ private void RegisterCapabilities() // Screen capability _screenCapability = new ScreenCapability(_logger); - _screenCapability.ListRequested += OnScreenList; + _screenCapability.ListRequested += OnScreenList; _screenCapability.CaptureRequested += OnScreenCapture; + _screenCapability.RecordRequested += OnScreenRecord; _nodeClient.RegisterCapability(_screenCapability); // Camera capability @@ -432,7 +435,15 @@ private async Task OnScreenCapture(ScreenCaptureArgs args) return await _screenCaptureService.CaptureAsync(args); } - + + private Task OnScreenRecord(ScreenRecordArgs args) + { + if (_screenRecordingService == null) + throw new InvalidOperationException("Screen recording service not available"); + + return _screenRecordingService.RecordAsync(args); + } + #endregion #region Camera Capability Handlers diff --git a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs index 67de774..53b492e 100644 --- a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs +++ b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs @@ -683,7 +683,8 @@ public void CanHandle_ScreenCommands() var cap = new ScreenCapability(NullLogger.Instance); Assert.True(cap.CanHandle("screen.capture")); Assert.True(cap.CanHandle("screen.list")); - Assert.False(cap.CanHandle("screen.record")); + Assert.True(cap.CanHandle("screen.record")); + Assert.False(cap.CanHandle("screen.unknown")); Assert.Equal("screen", cap.Category); } @@ -835,6 +836,110 @@ public async Task Capture_UsesMonitorAlias_ForScreenIndex() Assert.NotNull(receivedArgs); Assert.Equal(2, receivedArgs!.MonitorIndex); } + + [Fact] + public async Task Record_ReturnsError_WhenNoHandler() + { + var cap = new ScreenCapability(NullLogger.Instance); + var req = new NodeInvokeRequest { Id = "sr1", Command = "screen.record", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("not available", res.Error, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Record_CallsHandler_WithArgs() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordArgs? receivedArgs = null; + cap.RecordRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new ScreenRecordResult + { + Format = "mp4", Base64 = "vid", DurationMs = 2000, Fps = 10, + ScreenIndex = 1, Width = 1920, Height = 1080 + }); + }; + + var req = new NodeInvokeRequest + { + Id = "sr2", + Command = "screen.record", + Args = Parse("""{"durationMs":2000,"fps":10,"screenIndex":1}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(receivedArgs); + Assert.Equal(2000, receivedArgs!.DurationMs); + Assert.Equal(10, receivedArgs.Fps); + Assert.Equal(1, receivedArgs.ScreenIndex); + + var json = JsonSerializer.Serialize(res.Payload); + using var doc = JsonDocument.Parse(json); + var root = doc.RootElement; + Assert.Equal("mp4", root.GetProperty("format").GetString()); + Assert.Equal("vid", root.GetProperty("base64").GetString()); + Assert.Equal(2000, root.GetProperty("durationMs").GetInt32()); + Assert.Equal(10, root.GetProperty("fps").GetInt32()); + Assert.Equal(1, root.GetProperty("screenIndex").GetInt32()); + Assert.Equal(1920, root.GetProperty("width").GetInt32()); + Assert.Equal(1080, root.GetProperty("height").GetInt32()); + Assert.False( root.GetProperty("hasAudio").GetBoolean()); + } + + [Fact] + public async Task Record_UsesDefaults_WhenArgsMissing() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordArgs? receivedArgs = null; + cap.RecordRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new ScreenRecordResult()); + }; + + var req = new NodeInvokeRequest { Id = "sr3", Command = "screen.record", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.Equal(5000, receivedArgs!.DurationMs); + Assert.Equal(10, receivedArgs.Fps); + Assert.Equal(0, receivedArgs.ScreenIndex); + } + + [Fact] + public async Task Record_UsesMonitorAlias_ForScreenIndex() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordArgs? receivedArgs = null; + cap.RecordRequested += (args) => + { + receivedArgs = args; + return Task.FromResult(new ScreenRecordResult()); + }; + + var req = new NodeInvokeRequest + { + Id = "sr4", + Command = "screen.record", + Args = Parse("""{"monitor":2}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.Equal(2, receivedArgs!.ScreenIndex); + } + + [Fact] + public async Task Record_ReturnsError_WhenHandlerThrows() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.RecordRequested += (args) => throw new InvalidOperationException("GPU capture failed"); + + var req = new NodeInvokeRequest { Id = "sr5", Command = "screen.record", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("GPU capture failed", res.Error); + } } public class CameraCapabilityTests From 7cce9fe01efd6a3e3359c3d4eade8b479211ca13 Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 8 Apr 2026 19:09:47 +0200 Subject: [PATCH 04/10] feat: add screen.record.start and screen.record.stop Two new commands for session-based recording: - screen.record.start: opens a recording session and returns a recordingId - screen.record.stop: closes the session and returns the video ActiveSession manages the capture loop with a CancellationToken and stores frames safely under a lock. A ConcurrentDictionary keyed by recordingId allows concurrent sessions. 9 new tests cover: start/stop without a handler, args and monitor alias, recordingId in the start response, full stop payload, and exception paths. Co-Authored-By: Claude Sonnet 4.6 --- .../Capabilities/ScreenCapability.cs | 78 +++++++- .../Services/NodeService.cs | 19 ++ .../Services/ScreenRecordingService.cs | 175 +++++++++++++++++- .../OpenClaw.Shared.Tests/CapabilityTests.cs | 171 +++++++++++++++++ 4 files changed, 439 insertions(+), 4 deletions(-) diff --git a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs index e214603..e2643ac 100644 --- a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs @@ -16,6 +16,8 @@ public class ScreenCapability : NodeCapabilityBase "screen.capture", "screen.list", "screen.record", + "screen.record.start", + "screen.record.stop", }; public override IReadOnlyList Commands => _commands; @@ -24,6 +26,8 @@ public class ScreenCapability : NodeCapabilityBase public event Func>? CaptureRequested; public event Func>? ListRequested; public event Func>? RecordRequested; + public event Func>? StartRequested; + public event Func>? StopRequested; public ScreenCapability(IOpenClawLogger logger) : base(logger) { @@ -33,9 +37,11 @@ public override async Task ExecuteAsync(NodeInvokeRequest re { return request.Command switch { - "screen.capture" => await HandleCaptureAsync(request), - "screen.list" => await HandleListAsync(request), - "screen.record" => await HandleRecordAsync(request), + "screen.capture" => await HandleCaptureAsync(request), + "screen.list" => await HandleListAsync(request), + "screen.record" => await HandleRecordAsync(request), + "screen.record.start" => await HandleStartAsync(request), + "screen.record.stop" => await HandleStopAsync(request), _ => Error($"Unknown command: {request.Command}") }; } @@ -155,6 +161,66 @@ private async Task HandleRecordAsync(NodeInvokeRequest reque return Error($"Record failed: {ex.Message}"); } } + + private async Task HandleStartAsync(NodeInvokeRequest request) + { + var fps = GetIntArg(request.Args, "fps", 10); + var screenIndex = GetIntArg(request.Args, "screenIndex", GetIntArg(request.Args, "monitor", 0)); + + Logger.Info($"screen.record.start: fps={fps} screenIndex={screenIndex}"); + + if (StartRequested == null) + return Error("Screen recording not available"); + + try + { + var recordingId = await StartRequested(new ScreenRecordStartArgs + { + Fps = fps, + ScreenIndex = screenIndex, + }); + return Success(new { recordingId }); + } + catch (Exception ex) + { + Logger.Error("screen.record.start failed", ex); + return Error($"Start failed: {ex.Message}"); + } + } + + private async Task HandleStopAsync(NodeInvokeRequest request) + { + var recordingId = GetStringArg(request.Args, "recordingId", ""); + + Logger.Info($"screen.record.stop: recordingId={recordingId}"); + + if (string.IsNullOrEmpty(recordingId)) + return Error("recordingId is required"); + + if (StopRequested == null) + return Error("Screen recording not available"); + + try + { + var result = await StopRequested(recordingId); + return Success(new + { + format = result.Format, + base64 = result.Base64, + durationMs = result.DurationMs, + fps = result.Fps, + screenIndex = result.ScreenIndex, + width = result.Width, + height = result.Height, + hasAudio = result.HasAudio, + }); + } + catch (Exception ex) + { + Logger.Error("screen.record.stop failed", ex); + return Error($"Stop failed: {ex.Message}"); + } + } } public class ScreenRecordArgs @@ -164,6 +230,12 @@ public class ScreenRecordArgs public int ScreenIndex { get; set; } } +public class ScreenRecordStartArgs +{ + public int Fps { get; set; } = 10; + public int ScreenIndex { get; set; } +} + public class ScreenRecordResult { public string Base64 { get; set; } = ""; diff --git a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs index 10b6506..b14b3c4 100644 --- a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs @@ -130,6 +130,8 @@ private void RegisterCapabilities() _screenCapability.ListRequested += OnScreenList; _screenCapability.CaptureRequested += OnScreenCapture; _screenCapability.RecordRequested += OnScreenRecord; + _screenCapability.StartRequested += OnScreenRecordStart; + _screenCapability.StopRequested += OnScreenRecordStop; _nodeClient.RegisterCapability(_screenCapability); // Camera capability @@ -444,6 +446,22 @@ private Task OnScreenRecord(ScreenRecordArgs args) return _screenRecordingService.RecordAsync(args); } + private Task OnScreenRecordStart(ScreenRecordStartArgs args) + { + if (_screenRecordingService == null) + throw new InvalidOperationException("Screen recording service not available"); + + return _screenRecordingService.StartAsync(args); + } + + private Task OnScreenRecordStop(string recordingId) + { + if (_screenRecordingService == null) + throw new InvalidOperationException("Screen recording service not available"); + + return _screenRecordingService.StopAsync(recordingId); + } + #endregion #region Camera Capability Handlers @@ -494,6 +512,7 @@ public void Dispose() _nodeClient = null; try { client?.Dispose(); } catch { /* ignore */ } + try { _screenRecordingService?.Dispose(); } catch { /* ignore */ } try { _cameraCaptureService?.Dispose(); } catch { /* ignore */ } if (_canvasWindow != null && !_canvasWindow.IsClosed) diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs index cbd350a..5ed2628 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Threading; @@ -19,9 +20,10 @@ namespace OpenClawTray.Services; /// /// Records the screen using Windows.Graphics.Capture and encodes to MP4 via MediaTranscoder. /// -internal sealed class ScreenRecordingService +internal sealed class ScreenRecordingService : IDisposable { private readonly IOpenClawLogger _logger; + private readonly ConcurrentDictionary _sessions = new(); private const int MaxFps = 60; private const int MinFps = 1; @@ -132,6 +134,77 @@ public async Task RecordAsync(ScreenRecordArgs args) }; } + public Task StartAsync(ScreenRecordStartArgs args) + { + var fps = Math.Clamp(args.Fps, MinFps, MaxFps); + var screenIndex = args.ScreenIndex; + + _logger.Info($"[ScreenRecording] start fps={fps} screen={screenIndex}"); + + var item = CreateCaptureItem(screenIndex); + var width = item.Size.Width; + var height = item.Size.Height; + var d3d = CreateDirect3DDevice(); + + var pool = Direct3D11CaptureFramePool.CreateFreeThreaded( + d3d, + DirectXPixelFormat.B8G8R8A8UIntNormalized, + PoolBuffers, + new global::Windows.Graphics.SizeInt32 { Width = width, Height = height }); + + var captureSession = pool.CreateCaptureSession(item); + captureSession.IsCursorCaptureEnabled = false; + + var session = new ActiveSession(screenIndex, fps, width, height, pool, captureSession, _logger); + _sessions[session.Id] = session; + + _logger.Info($"[ScreenRecording] started session {session.Id}"); + return Task.FromResult(session.Id); + } + + public async Task StopAsync(string recordingId) + { + if (!_sessions.TryRemove(recordingId, out var session)) + throw new KeyNotFoundException($"Recording session '{recordingId}' not found"); + + _logger.Info($"[ScreenRecording] stopping session {recordingId}..."); + + List frames; + int width, height, fps, screenIndex, durationMs; + using (session) + { + (frames, durationMs) = await session.StopAsync(); + width = session.Width; + height = session.Height; + fps = session.Fps; + screenIndex = session.ScreenIndex; + } + + _logger.Info($"[ScreenRecording] session {recordingId}: {frames.Count} frames, encoding..."); + var base64 = await EncodeToMp4Async(frames, width, height, fps); + + return new ScreenRecordResult + { + Format = "mp4", + Base64 = base64, + DurationMs = durationMs, + Fps = fps, + ScreenIndex = screenIndex, + Width = width, + Height = height, + HasAudio = false, + }; + } + + public void Dispose() + { + foreach (var kv in _sessions) + { + if (_sessions.TryRemove(kv.Key, out var s)) + try { s.Dispose(); } catch { } + } + } + // ── Encoding ────────────────────────────────────────────────────────────── private static async Task EncodeToMp4Async( @@ -325,4 +398,104 @@ private interface IGraphicsCaptureItemInterop void CreateForWindow(IntPtr hwnd, in Guid riid, out IntPtr ppv); void CreateForMonitor(IntPtr hMonitor, in Guid riid, out IntPtr ppv); } + + // ── Active session (start/stop) ─────────────────────────────────────────── + + private sealed class ActiveSession : IDisposable + { + public readonly string Id = Guid.NewGuid().ToString("N")[..12]; + public readonly int ScreenIndex; + public readonly int Fps; + public readonly int Width; + public readonly int Height; + + private readonly IOpenClawLogger _logger; + private readonly List _frames = new(); + private readonly object _framesLock = new(); + private readonly CancellationTokenSource _cts = new(); + private readonly Direct3D11CaptureFramePool _pool; + private readonly GraphicsCaptureSession _session; + private readonly DateTime _startedAt = DateTime.UtcNow; + private volatile Direct3D11CaptureFrame? _latestFrame; + private readonly SemaphoreSlim _ready = new(0, 1); + private readonly Task _captureTask; + + public ActiveSession(int screenIndex, int fps, int width, int height, + Direct3D11CaptureFramePool pool, GraphicsCaptureSession session, + IOpenClawLogger logger) + { + ScreenIndex = screenIndex; Fps = fps; Width = width; Height = height; + _pool = pool; _session = session; _logger = logger; + + pool.FrameArrived += OnFrameArrived; + session.StartCapture(); + _captureTask = RunAsync(_cts.Token); + } + + private void OnFrameArrived(Direct3D11CaptureFramePool pool, object _) + { + var f = pool.TryGetNextFrame(); + if (f == null) return; + Interlocked.Exchange(ref _latestFrame, f)?.Dispose(); + try { _ready.Release(); } catch { /* already signaled */ } + } + + private async Task RunAsync(CancellationToken ct) + { + var intervalMs = 1000 / Fps; + var nextCapture = DateTime.UtcNow; + + while (!ct.IsCancellationRequested) + { + try + { + var waitMs = (int)(nextCapture - DateTime.UtcNow).TotalMilliseconds; + if (waitMs > 0) await Task.Delay(waitMs, ct); + + if (!await _ready.WaitAsync(intervalMs * 2, ct)) continue; + } + catch (OperationCanceledException) { break; } + + var frame = Interlocked.Exchange(ref _latestFrame, null); + if (frame == null) continue; + + using (frame) + { + try + { + var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); + var bytes = ExtractBitmapBytes(bmp); + lock (_framesLock) _frames.Add(bytes); + } + catch (Exception ex) + { + _logger.Warn($"[ScreenRecording] Session {Id} frame skipped: {ex.Message}"); + } + } + + nextCapture = nextCapture.AddMilliseconds(intervalMs); + } + } + + public async Task<(List frames, int durationMs)> StopAsync() + { + _cts.Cancel(); + try { await _captureTask; } catch (OperationCanceledException) { } catch { } + + var durationMs = (int)(DateTime.UtcNow - _startedAt).TotalMilliseconds; + List snapshot; + lock (_framesLock) snapshot = new List(_frames); + return (snapshot, durationMs); + } + + public void Dispose() + { + _cts.Cancel(); + try { _session.Dispose(); } catch { } + try { _pool.Dispose(); } catch { } + Interlocked.Exchange(ref _latestFrame, null)?.Dispose(); + _cts.Dispose(); + _ready.Dispose(); + } + } } diff --git a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs index 53b492e..3f168ee 100644 --- a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs +++ b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs @@ -940,6 +940,177 @@ public async Task Record_ReturnsError_WhenHandlerThrows() Assert.False(res.Ok); Assert.Contains("GPU capture failed", res.Error); } + + // ── screen.record.start ──────────────────────────────────────────────────── + + [Fact] + public void CanHandle_RecordStartStop() + { + var cap = new ScreenCapability(NullLogger.Instance); + Assert.True(cap.CanHandle("screen.record.start")); + Assert.True(cap.CanHandle("screen.record.stop")); + Assert.False(cap.CanHandle("screen.record.pause")); + } + + [Fact] + public async Task Start_ReturnsError_WhenNoHandler() + { + var cap = new ScreenCapability(NullLogger.Instance); + var req = new NodeInvokeRequest { Id = "ss1", Command = "screen.record.start", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("not available", res.Error!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Start_CallsHandler_WithArgs_AndReturnsRecordingId() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordStartArgs? receivedArgs = null; + cap.StartRequested += args => + { + receivedArgs = args; + return Task.FromResult("abc123"); + }; + + var req = new NodeInvokeRequest + { + Id = "ss2", + Command = "screen.record.start", + Args = Parse("""{"fps":15,"screenIndex":2}""") + }; + + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.NotNull(receivedArgs); + Assert.Equal(15, receivedArgs!.Fps); + Assert.Equal(2, receivedArgs.ScreenIndex); + + var json = JsonSerializer.Serialize(res.Payload); + using var doc = JsonDocument.Parse(json); + Assert.Equal("abc123", doc.RootElement.GetProperty("recordingId").GetString()); + } + + [Fact] + public async Task Start_UsesMonitorAlias_ForScreenIndex() + { + var cap = new ScreenCapability(NullLogger.Instance); + ScreenRecordStartArgs? receivedArgs = null; + cap.StartRequested += args => { receivedArgs = args; return Task.FromResult("id1"); }; + + var req = new NodeInvokeRequest + { + Id = "ss3", + Command = "screen.record.start", + Args = Parse("""{"monitor":1}""") + }; + + await cap.ExecuteAsync(req); + Assert.Equal(1, receivedArgs!.ScreenIndex); + } + + [Fact] + public async Task Start_ReturnsError_WhenHandlerThrows() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.StartRequested += _ => throw new InvalidOperationException("D3D init failed"); + + var req = new NodeInvokeRequest { Id = "ss4", Command = "screen.record.start", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("D3D init failed", res.Error); + } + + // ── screen.record.stop ───────────────────────────────────────────────────── + + [Fact] + public async Task Stop_ReturnsError_WhenNoHandler() + { + var cap = new ScreenCapability(NullLogger.Instance); + var req = new NodeInvokeRequest + { + Id = "st1", + Command = "screen.record.stop", + Args = Parse("""{"recordingId":"abc"}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("not available", res.Error!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Stop_ReturnsError_WhenMissingRecordingId() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.StopRequested += _ => Task.FromResult(new ScreenRecordResult()); + + var req = new NodeInvokeRequest { Id = "st2", Command = "screen.record.stop", Args = Parse("""{}""") }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("recordingId", res.Error!, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public async Task Stop_CallsHandler_WithRecordingId_AndReturnsFullPayload() + { + var cap = new ScreenCapability(NullLogger.Instance); + string? receivedId = null; + cap.StopRequested += id => + { + receivedId = id; + return Task.FromResult(new ScreenRecordResult + { + Format = "mp4", + Base64 = "dGVzdA==", + DurationMs = 3200, + Fps = 15, + ScreenIndex = 1, + Width = 1920, + Height = 1080, + HasAudio = false, + }); + }; + + var req = new NodeInvokeRequest + { + Id = "st3", + Command = "screen.record.stop", + Args = Parse("""{"recordingId":"myRecId"}""") + }; + + var res = await cap.ExecuteAsync(req); + Assert.True(res.Ok); + Assert.Equal("myRecId", receivedId); + + var json = JsonSerializer.Serialize(res.Payload); + using var doc = JsonDocument.Parse(json); + var p = doc.RootElement; + Assert.Equal("mp4", p.GetProperty("format").GetString()); + Assert.Equal("dGVzdA==", p.GetProperty("base64").GetString()); + Assert.Equal(3200, p.GetProperty("durationMs").GetInt32()); + Assert.Equal(15, p.GetProperty("fps").GetInt32()); + Assert.Equal(1, p.GetProperty("screenIndex").GetInt32()); + Assert.Equal(1920, p.GetProperty("width").GetInt32()); + Assert.Equal(1080, p.GetProperty("height").GetInt32()); + Assert.False( p.GetProperty("hasAudio").GetBoolean()); + } + + [Fact] + public async Task Stop_ReturnsError_WhenHandlerThrows() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.StopRequested += _ => throw new KeyNotFoundException("session not found"); + + var req = new NodeInvokeRequest + { + Id = "st4", + Command = "screen.record.stop", + Args = Parse("""{"recordingId":"bad"}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("session not found", res.Error); + } } public class CameraCapabilityTests From f4dbc521df0d2dc1cd743d417fe1f7ebca93cac4 Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Thu, 9 Apr 2026 17:00:09 +0200 Subject: [PATCH 05/10] fix: repair screen recording capture and encoding pipeline - Fix InvalidCastException in CreateForMonitor: pass IID_IInspectable instead of typeof(GraphicsCaptureItem).GUID, which returns a C#/WinRT- generated GUID unrecognized by the native COM method (E_NOINTERFACE). - Replace PrepareStreamTranscodeAsync with PrepareMediaStreamSourceTranscodeAsync + MediaStreamSource feeding NV12 samples on demand, fixing "Transcode failed: Unknown" on all three screen recording commands. - Add 500 MB frame-buffer cap (MaxFrameBufferBytes) with early stop and warning log to prevent OOM on long or high-fps recordings. - Save encoded MP4 to %TEMP%\openclaw\ and return filePath in the response. - Change ScreenRecordResult.Fps from float to int. Co-Authored-By: Claude Sonnet 4.6 --- .../Capabilities/ScreenCapability.cs | 17 +- .../Services/ScreenRecordingService.cs | 181 ++++++++++++------ 2 files changed, 136 insertions(+), 62 deletions(-) diff --git a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs index e2643ac..62c380d 100644 --- a/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/ScreenCapability.cs @@ -147,6 +147,7 @@ private async Task HandleRecordAsync(NodeInvokeRequest reque { format = result.Format, base64 = result.Base64, + filePath = result.FilePath, durationMs = result.DurationMs, fps = result.Fps, screenIndex = result.ScreenIndex, @@ -158,7 +159,7 @@ private async Task HandleRecordAsync(NodeInvokeRequest reque catch (Exception ex) { Logger.Error("screen.record failed", ex); - return Error($"Record failed: {ex.Message}"); + return Error($"Record failed: {ex.GetType().Name}: {ex.Message} | {ex.StackTrace?.Split('\n').FirstOrDefault()?.Trim()}"); } } @@ -207,6 +208,7 @@ private async Task HandleStopAsync(NodeInvokeRequest request { format = result.Format, base64 = result.Base64, + filePath = result.FilePath, durationMs = result.DurationMs, fps = result.Fps, screenIndex = result.ScreenIndex, @@ -223,6 +225,12 @@ private async Task HandleStopAsync(NodeInvokeRequest request } } +/// +/// Parameters for a fixed-duration screen recording. +/// Memory usage: width × height × 4 bytes × (durationMs/1000 × fps) frames. +/// Recommended limits: durationMs ≤ 10 000, fps ≤ 10 for 1080p to stay under 500 MB. +/// The service enforces a hard 500 MB frame-buffer cap and stops capture early if exceeded. +/// public class ScreenRecordArgs { public int DurationMs { get; set; } = 5000; @@ -230,6 +238,10 @@ public class ScreenRecordArgs public int ScreenIndex { get; set; } } +/// +/// Parameters for an open-ended screen recording session (screen.record.start / screen.record.stop). +/// The same 500 MB frame-buffer cap applies; capture stops automatically if the limit is hit. +/// public class ScreenRecordStartArgs { public int Fps { get; set; } = 10; @@ -240,8 +252,9 @@ public class ScreenRecordResult { public string Base64 { get; set; } = ""; public string Format { get; set; } = "mp4"; + public string? FilePath { get; set; } public int DurationMs { get; set; } - public float Fps { get; set; } + public int Fps { get; set; } public int ScreenIndex { get; set; } public int Width { get; set; } public int Height { get; set; } diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs index 5ed2628..b4347f8 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -8,6 +8,7 @@ using Windows.Graphics.DirectX; using Windows.Graphics.DirectX.Direct3D11; using Windows.Graphics.Imaging; +using Windows.Media.Core; using Windows.Media.MediaProperties; using Windows.Media.Transcoding; using Windows.Storage.Streams; @@ -31,6 +32,11 @@ internal sealed class ScreenRecordingService : IDisposable private const int MaxDurationMs = 60_000; private const int PoolBuffers = 2; + // BGRA frame buffer safety cap: ~500 MB across all queued frames. + // At 1080p (8 MB/frame) this allows ~62 frames; at 720p (~4 MB) ~125 frames. + // Frames beyond this limit are dropped to prevent OOM on long/high-fps recordings. + private const long MaxFrameBufferBytes = 500L * 1024 * 1024; + public ScreenRecordingService(IOpenClawLogger logger) { _logger = logger; @@ -56,6 +62,7 @@ public async Task RecordAsync(ScreenRecordArgs args) var latestFrame = (Direct3D11CaptureFrame?)null; using var ready = new SemaphoreSlim(0, 1); var frames = new List(); + var frameBytes = (long)width * height * 4; // BGRA bytes per frame try { @@ -96,6 +103,12 @@ public async Task RecordAsync(ScreenRecordArgs args) using (frame) { + if (frames.Count * frameBytes >= MaxFrameBufferBytes) + { + _logger.Warn($"[ScreenRecording] Frame buffer cap reached ({MaxFrameBufferBytes / 1024 / 1024} MB), stopping early."); + break; + } + try { var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); @@ -120,11 +133,13 @@ public async Task RecordAsync(ScreenRecordArgs args) _logger.Info($"[ScreenRecording] Captured {frames.Count} frames, encoding..."); var base64 = await EncodeToMp4Async(frames, width, height, fps); + var filePath = SaveToTempFile(base64); return new ScreenRecordResult { Format = "mp4", Base64 = base64, + FilePath = filePath, DurationMs = durationMs, Fps = fps, ScreenIndex = screenIndex, @@ -182,11 +197,13 @@ public async Task StopAsync(string recordingId) _logger.Info($"[ScreenRecording] session {recordingId}: {frames.Count} frames, encoding..."); var base64 = await EncodeToMp4Async(frames, width, height, fps); + var filePath = SaveToTempFile(base64); return new ScreenRecordResult { Format = "mp4", Base64 = base64, + FilePath = filePath, DurationMs = durationMs, Fps = fps, ScreenIndex = screenIndex, @@ -205,82 +222,116 @@ public void Dispose() } } + // ── Temp file ───────────────────────────────────────────────────────────── + + private string SaveToTempFile(string base64) + { + var dir = Path.Combine(Path.GetTempPath(), "openclaw"); + Directory.CreateDirectory(dir); + var path = Path.Combine(dir, $"openclaw-screen-record-{Guid.NewGuid()}.mp4"); + File.WriteAllBytes(path, Convert.FromBase64String(base64)); + _logger.Info($"[ScreenRecording] Saved to {path}"); + return path; + } + // ── Encoding ────────────────────────────────────────────────────────────── private static async Task EncodeToMp4Async( List frames, int width, int height, int fps) { - var output = new InMemoryRandomAccessStream(); - - var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.HD720p); - profile.Video.Width = (uint)width; - profile.Video.Height = (uint)height; - profile.Video.FrameRate.Numerator = (uint)fps; - profile.Video.FrameRate.Denominator = 1; - profile.Audio = null; + if (frames.Count == 0) + throw new InvalidOperationException("No frames to encode"); - var input = BuildRawVideoStream(frames, width, height); + var encWidth = (uint)(width & ~1); + var encHeight = (uint)(height & ~1); + var fi = new[] { 0 }; - PrepareTranscodeResult? xcode = null; - try + MediaStreamSource MakeMss() { - xcode = await new MediaTranscoder { HardwareAccelerationEnabled = true } - .PrepareStreamTranscodeAsync(input, output, profile); + fi[0] = 0; + var inputProps = VideoEncodingProperties.CreateUncompressed( + MediaEncodingSubtypes.Nv12, encWidth, encHeight); + inputProps.FrameRate.Numerator = (uint)fps; + inputProps.FrameRate.Denominator = 1; + var mss = new MediaStreamSource(new VideoStreamDescriptor(inputProps)); + mss.BufferTime = TimeSpan.Zero; + mss.SampleRequested += (_, e) => + { + if (fi[0] >= frames.Count) { e.Request.Sample = null; return; } + var nv12 = BgraToNv12(frames[fi[0]], width, height, (int)encWidth, (int)encHeight); + var ts = TimeSpan.FromTicks((long)(fi[0] * 10_000_000.0 / fps)); + var dur = TimeSpan.FromTicks((long)(10_000_000.0 / fps)); + var dw = new DataWriter(); + dw.WriteBytes(nv12); + var sample = MediaStreamSample.CreateFromBuffer(dw.DetachBuffer(), ts); + sample.Duration = dur; + e.Request.Sample = sample; + fi[0]++; + }; + return mss; } - catch + + MediaEncodingProfile MakeProfile() { - xcode = await new MediaTranscoder { HardwareAccelerationEnabled = false } - .PrepareStreamTranscodeAsync(input, output, profile); + var profile = MediaEncodingProfile.CreateMp4(VideoEncodingQuality.Auto); + profile.Video.Width = encWidth; + profile.Video.Height = encHeight; + profile.Video.Bitrate = 4_000_000; + profile.Video.FrameRate.Numerator = (uint)fps; + profile.Video.FrameRate.Denominator = 1; + profile.Audio = null; + return profile; } - if (!xcode.CanTranscode) - throw new InvalidOperationException($"Transcode failed: {xcode.FailureReason}"); - - await xcode.TranscodeAsync(); - - output.Seek(0); - var reader = new DataReader(output); - await reader.LoadAsync((uint)output.Size); - var bytes = new byte[output.Size]; - reader.ReadBytes(bytes); - return Convert.ToBase64String(bytes); - } + foreach (var hwEnabled in new[] { true, false }) + { + using var output = new InMemoryRandomAccessStream(); + var transcoder = new MediaTranscoder { HardwareAccelerationEnabled = hwEnabled }; + PrepareTranscodeResult result; + try + { + result = await transcoder + .PrepareMediaStreamSourceTranscodeAsync(MakeMss(), output, MakeProfile()); + } + catch (System.Runtime.InteropServices.COMException) when (hwEnabled) + { + continue; + } + if (!result.CanTranscode) continue; + await result.TranscodeAsync(); + var size = (uint)output.Size; + if (size == 0) continue; + var dr = new DataReader(output.GetInputStreamAt(0)); + await dr.LoadAsync(size); + var bytes = new byte[size]; + dr.ReadBytes(bytes); + return Convert.ToBase64String(bytes); + } - private static InMemoryRandomAccessStream BuildRawVideoStream( - List frames, int width, int height) - { - var stream = new InMemoryRandomAccessStream(); - var writer = new DataWriter(stream); - foreach (var frame in frames) - writer.WriteBytes(BgraToNv12(frame, width, height)); - writer.StoreAsync().AsTask().Wait(); - stream.Seek(0); - return stream; + throw new InvalidOperationException("No encoder available (hardware or software)"); } - /// BT.601 limited-range BGRA→NV12 conversion. - private static byte[] BgraToNv12(byte[] bgra, int width, int height) + private static byte[] BgraToNv12(byte[] bgra, int srcWidth, int srcHeight, + int encWidth, int encHeight) { - var nv12 = new byte[width * height * 3 / 2]; - int yBase = 0; - int uvBase = width * height; - - for (int y = 0; y < height; y++) - for (int x = 0; x < width; x++) + var nv12 = new byte[encWidth * encHeight * 3 / 2]; + for (int y = 0; y < encHeight; y++) + for (int x = 0; x < encWidth; x++) { - int i = (y * width + x) * 4; - byte b = bgra[i], g = bgra[i + 1], r = bgra[i + 2]; - - nv12[yBase++] = (byte)(16 + (66 * r + 129 * g + 25 * b) / 256); - - if ((y & 1) == 0 && (x & 1) == 0) - { - int uv = uvBase + (y / 2 * width) + (x & ~1); - nv12[uv] = (byte)(128 + (-38 * r - 74 * g + 112 * b) / 256); - nv12[uv + 1] = (byte)(128 + (112 * r - 94 * g - 18 * b) / 256); - } + int i = (y * srcWidth + x) * 4; + int b = bgra[i], g = bgra[i + 1], r = bgra[i + 2]; + nv12[y * encWidth + x] = (byte)(((66 * r + 129 * g + 25 * b + 128) >> 8) + 16); + } + int uvBase = encWidth * encHeight; + for (int y = 0; y < encHeight; y += 2) + for (int x = 0; x < encWidth; x += 2) + { + int i = (y * srcWidth + x) * 4; + int b = bgra[i], g = bgra[i + 1], r = bgra[i + 2]; + int uvIdx = uvBase + (y / 2) * encWidth + x; + nv12[uvIdx] = (byte)(((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128); + nv12[uvIdx + 1] = (byte)(((112 * r - 94 * g - 18 * b + 128) >> 8) + 128); } - return nv12; } @@ -324,10 +375,10 @@ private static GraphicsCaptureItem CreateCaptureItem(int screenIndex) var factory = (IGraphicsCaptureItemInterop)Marshal.GetObjectForIUnknown(factoryPtr); Marshal.Release(factoryPtr); - var itemIid = typeof(GraphicsCaptureItem).GUID; + var itemIid = new Guid("AF86E2E0-B12D-4C6A-9C5A-D7AA65101E90"); // IInspectable factory.CreateForMonitor(monitors[screenIndex], in itemIid, out var itemPtr); - var item = MarshalInterface.FromAbi(itemPtr); + var item = MarshalInspectable.FromAbi(itemPtr); Marshal.Release(itemPtr); return item; } @@ -444,6 +495,7 @@ private async Task RunAsync(CancellationToken ct) { var intervalMs = 1000 / Fps; var nextCapture = DateTime.UtcNow; + var frameBytes = (long)Width * Height * 4; while (!ct.IsCancellationRequested) { @@ -461,6 +513,15 @@ private async Task RunAsync(CancellationToken ct) using (frame) { + int frameCount; + lock (_framesLock) frameCount = _frames.Count; + if (frameCount * frameBytes >= MaxFrameBufferBytes) + { + _logger.Warn($"[ScreenRecording] Session {Id}: frame buffer cap reached ({MaxFrameBufferBytes / 1024 / 1024} MB), stopping capture."); + _cts.Cancel(); + break; + } + try { var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); From 5267bb65bd2e6471ee8c9048f9bfd7e00d40d0b6 Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 15 Apr 2026 13:31:39 +0200 Subject: [PATCH 06/10] fix: throw explicit exceptions in CreateCaptureItem for missing/invalid monitor Silent fallback to index 0 masked the no-monitor case (IndexOutOfRange) and gave callers no indication that their screen index was wrong. Now throws InvalidOperationException (no screens) or ArgumentOutOfRangeException (bad index). Co-Authored-By: Claude Sonnet 4.6 --- .../Services/ScreenRecordingService.cs | 5 ++++- tests/OpenClaw.Shared.Tests/CapabilityTests.cs | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs index b4347f8..5ad35a1 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -362,8 +362,11 @@ private static IDirect3DDevice CreateDirect3DDevice() private static GraphicsCaptureItem CreateCaptureItem(int screenIndex) { var monitors = GetMonitorHandles(); + if (monitors.Count == 0) + throw new InvalidOperationException("No screens available for capture"); if (screenIndex < 0 || screenIndex >= monitors.Count) - screenIndex = 0; + throw new ArgumentOutOfRangeException(nameof(screenIndex), + $"Screen index {screenIndex} is out of range (0\u2013{monitors.Count - 1})"); const string classId = "Windows.Graphics.Capture.GraphicsCaptureItem"; var iid = typeof(IGraphicsCaptureItemInterop).GUID; diff --git a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs index 3f168ee..4d6a57f 100644 --- a/tests/OpenClaw.Shared.Tests/CapabilityTests.cs +++ b/tests/OpenClaw.Shared.Tests/CapabilityTests.cs @@ -941,6 +941,23 @@ public async Task Record_ReturnsError_WhenHandlerThrows() Assert.Contains("GPU capture failed", res.Error); } + [Fact] + public async Task Record_PropagatesOutOfRangeAsError() + { + var cap = new ScreenCapability(NullLogger.Instance); + cap.RecordRequested += _ => + throw new ArgumentOutOfRangeException("screenIndex", "Screen index 5 is out of range (0\u20131)"); + + var req = new NodeInvokeRequest + { + Id = "sr6", Command = "screen.record", + Args = Parse("""{"screenIndex":5}""") + }; + var res = await cap.ExecuteAsync(req); + Assert.False(res.Ok); + Assert.Contains("screenIndex", res.Error ?? ""); + } + // ── screen.record.start ──────────────────────────────────────────────────── [Fact] From 9008cb65db87012b66eda0f507a6452915737a43 Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 15 Apr 2026 13:32:27 +0200 Subject: [PATCH 07/10] fix: tie IDirect3DDevice lifetime to ActiveSession and RecordAsync finally D3D device was created but never disposed: in StartAsync the reference was dropped after pool creation; in RecordAsync it was outside the finally block. ActiveSession now owns the device and disposes it alongside pool and session. Co-Authored-By: Claude Sonnet 4.6 --- .../Services/ScreenRecordingService.cs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs index 5ad35a1..e96009b 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -127,6 +127,7 @@ public async Task RecordAsync(ScreenRecordArgs args) { session?.Dispose(); pool?.Dispose(); + (d3d as IDisposable)?.Dispose(); Interlocked.Exchange(ref latestFrame, null)?.Dispose(); } @@ -170,7 +171,7 @@ public Task StartAsync(ScreenRecordStartArgs args) var captureSession = pool.CreateCaptureSession(item); captureSession.IsCursorCaptureEnabled = false; - var session = new ActiveSession(screenIndex, fps, width, height, pool, captureSession, _logger); + var session = new ActiveSession(screenIndex, fps, width, height, d3d, pool, captureSession, _logger); _sessions[session.Id] = session; _logger.Info($"[ScreenRecording] started session {session.Id}"); @@ -464,6 +465,7 @@ private sealed class ActiveSession : IDisposable public readonly int Height; private readonly IOpenClawLogger _logger; + private readonly IDirect3DDevice _device; private readonly List _frames = new(); private readonly object _framesLock = new(); private readonly CancellationTokenSource _cts = new(); @@ -475,11 +477,11 @@ private sealed class ActiveSession : IDisposable private readonly Task _captureTask; public ActiveSession(int screenIndex, int fps, int width, int height, - Direct3D11CaptureFramePool pool, GraphicsCaptureSession session, + IDirect3DDevice device, Direct3D11CaptureFramePool pool, GraphicsCaptureSession session, IOpenClawLogger logger) { ScreenIndex = screenIndex; Fps = fps; Width = width; Height = height; - _pool = pool; _session = session; _logger = logger; + _device = device; _pool = pool; _session = session; _logger = logger; pool.FrameArrived += OnFrameArrived; session.StartCapture(); @@ -555,8 +557,10 @@ private async Task RunAsync(CancellationToken ct) public void Dispose() { _cts.Cancel(); + try { _captureTask.GetAwaiter().GetResult(); } catch { } try { _session.Dispose(); } catch { } try { _pool.Dispose(); } catch { } + try { (_device as IDisposable)?.Dispose(); } catch { } Interlocked.Exchange(ref _latestFrame, null)?.Dispose(); _cts.Dispose(); _ready.Dispose(); From e272e9f15f777272ec53f309f312f7729f9a7a45 Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 15 Apr 2026 13:33:03 +0200 Subject: [PATCH 08/10] fix: stop active recording sessions on node disconnect An abandoned screen.record.start session kept holding D3D device, frame pool and capture thread after disconnect. DisconnectAsync now calls StopAllSessions() so resources are released on every disconnect, not just on full shutdown. Co-Authored-By: Claude Sonnet 4.6 --- src/OpenClaw.Tray.WinUI/Services/NodeService.cs | 4 +++- src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs index b14b3c4..4915855 100644 --- a/src/OpenClaw.Tray.WinUI/Services/NodeService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/NodeService.cs @@ -94,7 +94,9 @@ public async Task DisconnectAsync() _nodeClient.Dispose(); _nodeClient = null; } - + + _screenRecordingService?.StopAllSessions(); + // Close canvas window if (_canvasWindow != null && !_canvasWindow.IsClosed) { diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs index e96009b..142026a 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -214,7 +214,7 @@ public async Task StopAsync(string recordingId) }; } - public void Dispose() + public void StopAllSessions() { foreach (var kv in _sessions) { @@ -223,6 +223,8 @@ public void Dispose() } } + public void Dispose() => StopAllSessions(); + // ── Temp file ───────────────────────────────────────────────────────────── private string SaveToTempFile(string base64) From 8b6982715e64084c414bf49381f61d583494e1fd Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 15 Apr 2026 13:33:39 +0200 Subject: [PATCH 09/10] fix: dispose SoftwareBitmap and DataWriter immediately after use SoftwareBitmap wraps native memory that the GC won't release until the finalizer runs, causing memory to pile up during long recordings. DataWriter has the same issue per-frame in the encoder. Adding 'using' ensures prompt release after each frame is processed. Co-Authored-By: Claude Sonnet 4.6 --- src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs index 142026a..89ac417 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -111,7 +111,7 @@ public async Task RecordAsync(ScreenRecordArgs args) try { - var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); + using var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); frames.Add(ExtractBitmapBytes(bmp)); } catch (Exception ex) @@ -264,7 +264,7 @@ MediaStreamSource MakeMss() var nv12 = BgraToNv12(frames[fi[0]], width, height, (int)encWidth, (int)encHeight); var ts = TimeSpan.FromTicks((long)(fi[0] * 10_000_000.0 / fps)); var dur = TimeSpan.FromTicks((long)(10_000_000.0 / fps)); - var dw = new DataWriter(); + using var dw = new DataWriter(); dw.WriteBytes(nv12); var sample = MediaStreamSample.CreateFromBuffer(dw.DetachBuffer(), ts); sample.Duration = dur; @@ -531,7 +531,7 @@ private async Task RunAsync(CancellationToken ct) try { - var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); + using var bmp = await SoftwareBitmap.CreateCopyFromSurfaceAsync(frame.Surface); var bytes = ExtractBitmapBytes(bmp); lock (_framesLock) _frames.Add(bytes); } From d962f4a13f811455174354114ab061c8e22737a5 Mon Sep 17 00:00:00 2001 From: AlexAlves87 Date: Wed, 15 Apr 2026 13:34:25 +0200 Subject: [PATCH 10/10] fix: delete temp MP4 recordings older than 24h on each new save Without a cleanup policy, screen recordings accumulated indefinitely in %TEMP%\openclaw\. Files contain screen content (potentially sensitive). CleanupOldTempRecordings() now runs before each SaveToTempFile call. Co-Authored-By: Claude Sonnet 4.6 --- .../Services/ScreenRecordingService.cs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs index 89ac417..a13a740 100644 --- a/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs +++ b/src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs @@ -231,12 +231,34 @@ private string SaveToTempFile(string base64) { var dir = Path.Combine(Path.GetTempPath(), "openclaw"); Directory.CreateDirectory(dir); + CleanupOldTempRecordings(dir); var path = Path.Combine(dir, $"openclaw-screen-record-{Guid.NewGuid()}.mp4"); File.WriteAllBytes(path, Convert.FromBase64String(base64)); _logger.Info($"[ScreenRecording] Saved to {path}"); return path; } + private void CleanupOldTempRecordings(string dir) + { + try + { + foreach (var file in Directory.EnumerateFiles(dir, "openclaw-screen-record-*.mp4")) + { + try + { + if (new FileInfo(file).CreationTimeUtc < DateTime.UtcNow.AddHours(-24)) + File.Delete(file); + } + catch (IOException) { } + catch (UnauthorizedAccessException) { } + } + } + catch (Exception ex) + { + _logger.Warn($"[ScreenRecording] Temp cleanup failed: {ex.Message}"); + } + } + // ── Encoding ────────────────────────────────────────────────────────────── private static async Task EncodeToMp4Async(