From ed77365d570e4d2434586bb79aa332882fc45a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 16 Dec 2025 17:15:23 +0100 Subject: [PATCH 1/2] feat: save videos --- src/agents/__main__.py | 1 + src/agents/evaluator_envs.py | 39 +++++++++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/agents/__main__.py b/src/agents/__main__.py index fabb3f6..5ae29f8 100644 --- a/src/agents/__main__.py +++ b/src/agents/__main__.py @@ -67,6 +67,7 @@ def _per_process( step, _agent_cfg, eval_cfgs, episodes, n_processes, nth_gpu = args logging.info(f"Starting evaluation for step {step}") os.environ["CUDA_VISIBLE_DEVICES"] = str(nth_gpu) + os.environ["CAM_PATH"] = f"{os.environ['RUN_PATH']}/videos/{step}" agent_cfg = copy.deepcopy(_agent_cfg) agent_cfg.agent_kwargs["checkpoint_step"] = step diff --git a/src/agents/evaluator_envs.py b/src/agents/evaluator_envs.py index 5b878dd..d835cf8 100644 --- a/src/agents/evaluator_envs.py +++ b/src/agents/evaluator_envs.py @@ -3,6 +3,7 @@ import json import logging import os +from pathlib import Path import shlex import subprocess from abc import ABC @@ -19,6 +20,7 @@ from agents.client import RemoteAgent from agents.policies import Act, Agent, Obs from agents.wrappers import HumanCameraWrapper +from PIL import Image logging.basicConfig( format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", @@ -62,17 +64,22 @@ def do_import(): class RCSPickUpCubeEval(EvaluatorEnv): INSTRUCTIONS = { - "rcs/FR3SimplePickUpSim-v0": "pick up the red cube", + "rcs/FR3SimplePickUpSim-v0": "pick the green box", + "rcs/FR3LabPickUpSimDigitHand-v0": "pick the green box", } def translate_obs(self, obs: dict[str, Any]) -> Obs: # does not include history + # side = obs["frames"]["arro"]["rgb"]["data"] side = obs["frames"]["side"]["rgb"]["data"] + wrist = obs["frames"]["wrist"]["rgb"]["data"] # depth_side = obs["frames"]["side"]["depth"]["data"], return Obs( - cameras=dict(rgb_side=side), + cameras=dict(rgb_side=side, rgb_wrist=wrist), + # cameras=dict(rgb_side=side), gripper=obs["gripper"], + info=dict(joints=obs["joints"]) ) def step(self, action: Act) -> tuple[Obs, float, bool, bool, dict]: @@ -99,9 +106,11 @@ def language_instruction(self) -> str: @staticmethod def do_import(): import rcs + import rcs_toolbox EvaluatorEnv.register("rcs/FR3SimplePickUpSim-v0", RCSPickUpCubeEval) +EvaluatorEnv.register("rcs/FR3LabPickUpSimDigitHand-v0", RCSPickUpCubeEval) class ManiSkill(EvaluatorEnv): @@ -212,7 +221,7 @@ class AgentConfig: port: int = 8080 -def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int) -> tuple[list[float], list[float], list[float]]: +def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[list[float], list[float], list[float]]: logging.debug(f"Starting evaluation of {env.env.unwrapped.spec.id}") obs, _ = env.reset(options={}) logging.debug(f"Reset env {env.env.unwrapped.spec.id}") @@ -222,6 +231,7 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int) -> tuple[list[f truncated = False step = 0.0 rewards = [] + im = [] while not done and not truncated and max_steps > step: action = agent.act(obs) obs, reward, done, truncated, _ = env.step(action) @@ -229,6 +239,24 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int) -> tuple[list[f done, truncated = bool(done), bool(truncated) step += 1 rewards.append(reward) + im.append(obs.cameras) + + Path(f"{os.environ['CAM_PATH']}").mkdir(exist_ok=True, parents=True) + for camera in im[0].keys(): + imgs = [] + for img in im: + # skip images that have timestamps closer together than 0.5s + imgs.append(Image.fromarray(img[camera])) + + + imgs[0].save( + f"{os.environ['CAM_PATH']}/{i}_{camera}_{str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}.gif", + save_all=True, + append_images=imgs[1:], + duration=0.2 * 1000, + loop=0, + ) + env.reset(options={}) logging.debug( @@ -262,7 +290,7 @@ def run_episode(args: tuple[int, list[EvalConfig], int, AgentConfig]) -> tuple[f while not agent.is_initialized(): logging.info("Waiting for agent to initialize...") sleep(5) - return single_eval(env, agent, cfg.max_steps_per_episode) + return single_eval(env, agent, cfg.max_steps_per_episode, i) def multi_eval( @@ -277,6 +305,7 @@ def multi_eval( # single_results = p.map(run_episode, args) # without process + np.random.seed(42) args = [(i, cfgs, episodes, agent_cfg) for i in range(len(cfgs) * episodes)] single_results = [run_episode(arg) for arg in tqdm(args)] @@ -321,7 +350,6 @@ def start_server( ] logging.info("Server starting: %s", " ".join(cmd)) p = subprocess.Popen(cmd) - sleep(5) try: yield p finally: @@ -352,6 +380,7 @@ def evaluation( with start_server( agent_cfg.agent_name, agent_cfg.agent_kwargs, agent_cfg.port, agent_cfg.host, agent_cfg.python_path ): + sleep(30) res = multi_eval(agent_cfg, eval_cfgs, episodes, n_processes) except Exception: # Ensures you SEE the client's stack trace and any logged errors. From d6223604e1ea8cd8ec7bcfc997ce1d184aaa3c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 14 Jan 2026 16:07:48 +0100 Subject: [PATCH 2/2] style: format --- src/agents/evaluator_envs.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/agents/evaluator_envs.py b/src/agents/evaluator_envs.py index d835cf8..739ff12 100644 --- a/src/agents/evaluator_envs.py +++ b/src/agents/evaluator_envs.py @@ -3,24 +3,24 @@ import json import logging import os -from pathlib import Path import shlex import subprocess from abc import ABC from contextlib import contextmanager from dataclasses import asdict, dataclass +from pathlib import Path from time import sleep from typing import Any import gymnasium as gym import numpy as np +from PIL import Image from simple_slurm import Slurm from tqdm import tqdm from agents.client import RemoteAgent from agents.policies import Act, Agent, Obs from agents.wrappers import HumanCameraWrapper -from PIL import Image logging.basicConfig( format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", @@ -79,7 +79,7 @@ def translate_obs(self, obs: dict[str, Any]) -> Obs: cameras=dict(rgb_side=side, rgb_wrist=wrist), # cameras=dict(rgb_side=side), gripper=obs["gripper"], - info=dict(joints=obs["joints"]) + info=dict(joints=obs["joints"]), ) def step(self, action: Act) -> tuple[Obs, float, bool, bool, dict]: @@ -248,7 +248,6 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[lis # skip images that have timestamps closer together than 0.5s imgs.append(Image.fromarray(img[camera])) - imgs[0].save( f"{os.environ['CAM_PATH']}/{i}_{camera}_{str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}.gif", save_all=True, @@ -257,7 +256,6 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[lis loop=0, ) - env.reset(options={}) logging.debug( f"Finished evaluation of {env.env.unwrapped.spec.id} with {step} steps and reward {reward}, success {done}"