From ed77365d570e4d2434586bb79aa332882fc45a0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 16 Dec 2025 17:15:23 +0100
Subject: [PATCH 1/2] feat: save videos

---
 src/agents/__main__.py       |  1 +
 src/agents/evaluator_envs.py | 39 +++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/src/agents/__main__.py b/src/agents/__main__.py
index fabb3f6..5ae29f8 100644
--- a/src/agents/__main__.py
+++ b/src/agents/__main__.py
@@ -67,6 +67,7 @@ def _per_process(
     step, _agent_cfg, eval_cfgs, episodes, n_processes, nth_gpu = args
     logging.info(f"Starting evaluation for step {step}")
     os.environ["CUDA_VISIBLE_DEVICES"] = str(nth_gpu)
+    os.environ["CAM_PATH"] = f"{os.environ['RUN_PATH']}/videos/{step}"
     agent_cfg = copy.deepcopy(_agent_cfg)
     agent_cfg.agent_kwargs["checkpoint_step"] = step
 
diff --git a/src/agents/evaluator_envs.py b/src/agents/evaluator_envs.py
index 5b878dd..d835cf8 100644
--- a/src/agents/evaluator_envs.py
+++ b/src/agents/evaluator_envs.py
@@ -3,6 +3,7 @@
 import json
 import logging
 import os
+from pathlib import Path
 import shlex
 import subprocess
 from abc import ABC
@@ -19,6 +20,7 @@
 from agents.client import RemoteAgent
 from agents.policies import Act, Agent, Obs
 from agents.wrappers import HumanCameraWrapper
+from PIL import Image
 
 logging.basicConfig(
     format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
@@ -62,17 +64,22 @@ def do_import():
 
 class RCSPickUpCubeEval(EvaluatorEnv):
     INSTRUCTIONS = {
-        "rcs/FR3SimplePickUpSim-v0": "pick up the red cube",
+        "rcs/FR3SimplePickUpSim-v0": "pick the green box",
+        "rcs/FR3LabPickUpSimDigitHand-v0": "pick the green box",
     }
 
     def translate_obs(self, obs: dict[str, Any]) -> Obs:
         # does not include history
 
+        # side = obs["frames"]["arro"]["rgb"]["data"]
         side = obs["frames"]["side"]["rgb"]["data"]
+        wrist = obs["frames"]["wrist"]["rgb"]["data"]
         # depth_side = obs["frames"]["side"]["depth"]["data"],
         return Obs(
-            cameras=dict(rgb_side=side),
+            cameras=dict(rgb_side=side, rgb_wrist=wrist),
+            # cameras=dict(rgb_side=side),
             gripper=obs["gripper"],
+            info=dict(joints=obs["joints"])
         )
 
     def step(self, action: Act) -> tuple[Obs, float, bool, bool, dict]:
@@ -99,9 +106,11 @@ def language_instruction(self) -> str:
     @staticmethod
     def do_import():
         import rcs
+        import rcs_toolbox
 
 
 EvaluatorEnv.register("rcs/FR3SimplePickUpSim-v0", RCSPickUpCubeEval)
+EvaluatorEnv.register("rcs/FR3LabPickUpSimDigitHand-v0", RCSPickUpCubeEval)
 
 
 class ManiSkill(EvaluatorEnv):
@@ -212,7 +221,7 @@ class AgentConfig:
     port: int = 8080
 
 
-def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int) -> tuple[list[float], list[float], list[float]]:
+def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[list[float], list[float], list[float]]:
     logging.debug(f"Starting evaluation of {env.env.unwrapped.spec.id}")
     obs, _ = env.reset(options={})
     logging.debug(f"Reset env {env.env.unwrapped.spec.id}")
@@ -222,6 +231,7 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int) -> tuple[list[f
     truncated = False
     step = 0.0
     rewards = []
+    im = []
     while not done and not truncated and max_steps > step:
         action = agent.act(obs)
         obs, reward, done, truncated, _ = env.step(action)
@@ -229,6 +239,24 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int) -> tuple[list[f
         done, truncated = bool(done), bool(truncated)
         step += 1
         rewards.append(reward)
+        im.append(obs.cameras)
+
+    Path(f"{os.environ['CAM_PATH']}").mkdir(exist_ok=True, parents=True)
+    for camera in im[0].keys():
+        imgs = []
+        for img in im:
+            # skip images that have timestamps closer together than 0.5s
+            imgs.append(Image.fromarray(img[camera]))
+
+
+        imgs[0].save(
+            f"{os.environ['CAM_PATH']}/{i}_{camera}_{str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}.gif",
+            save_all=True,
+            append_images=imgs[1:],
+            duration=0.2 * 1000,
+            loop=0,
+        )
+
 
     env.reset(options={})
     logging.debug(
@@ -262,7 +290,7 @@ def run_episode(args: tuple[int, list[EvalConfig], int, AgentConfig]) -> tuple[f
     while not agent.is_initialized():
         logging.info("Waiting for agent to initialize...")
         sleep(5)
-    return single_eval(env, agent, cfg.max_steps_per_episode)
+    return single_eval(env, agent, cfg.max_steps_per_episode, i)
 
 
 def multi_eval(
@@ -277,6 +305,7 @@ def multi_eval(
     #     single_results = p.map(run_episode, args)
 
     # without process
+    np.random.seed(42)
     args = [(i, cfgs, episodes, agent_cfg) for i in range(len(cfgs) * episodes)]
     single_results = [run_episode(arg) for arg in tqdm(args)]
 
@@ -321,7 +350,6 @@ def start_server(
     ]
     logging.info("Server starting: %s", " ".join(cmd))
     p = subprocess.Popen(cmd)
-    sleep(5)
     try:
         yield p
     finally:
@@ -352,6 +380,7 @@ def evaluation(
         with start_server(
             agent_cfg.agent_name, agent_cfg.agent_kwargs, agent_cfg.port, agent_cfg.host, agent_cfg.python_path
         ):
+            sleep(30)
             res = multi_eval(agent_cfg, eval_cfgs, episodes, n_processes)
     except Exception:
         # Ensures you SEE the client's stack trace and any logged errors.

From d6223604e1ea8cd8ec7bcfc997ce1d184aaa3c46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 14 Jan 2026 16:07:48 +0100
Subject: [PATCH 2/2] style: format

---
 src/agents/evaluator_envs.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/agents/evaluator_envs.py b/src/agents/evaluator_envs.py
index d835cf8..739ff12 100644
--- a/src/agents/evaluator_envs.py
+++ b/src/agents/evaluator_envs.py
@@ -3,24 +3,24 @@
 import json
 import logging
 import os
-from pathlib import Path
 import shlex
 import subprocess
 from abc import ABC
 from contextlib import contextmanager
 from dataclasses import asdict, dataclass
+from pathlib import Path
 from time import sleep
 from typing import Any
 
 import gymnasium as gym
 import numpy as np
+from PIL import Image
 from simple_slurm import Slurm
 from tqdm import tqdm
 
 from agents.client import RemoteAgent
 from agents.policies import Act, Agent, Obs
 from agents.wrappers import HumanCameraWrapper
-from PIL import Image
 
 logging.basicConfig(
     format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s",
@@ -79,7 +79,7 @@ def translate_obs(self, obs: dict[str, Any]) -> Obs:
             cameras=dict(rgb_side=side, rgb_wrist=wrist),
             # cameras=dict(rgb_side=side),
             gripper=obs["gripper"],
-            info=dict(joints=obs["joints"])
+            info=dict(joints=obs["joints"]),
         )
 
     def step(self, action: Act) -> tuple[Obs, float, bool, bool, dict]:
@@ -248,7 +248,6 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[lis
             # skip images that have timestamps closer together than 0.5s
             imgs.append(Image.fromarray(img[camera]))
 
-
         imgs[0].save(
             f"{os.environ['CAM_PATH']}/{i}_{camera}_{str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}.gif",
             save_all=True,
@@ -257,7 +256,6 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[lis
             loop=0,
         )
 
-
     env.reset(options={})
     logging.debug(
         f"Finished evaluation of {env.env.unwrapped.spec.id} with {step} steps and reward {reward}, success {done}"