From ac7a9c99a6c5047826e289c9617a6d2cbc764109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 16 Jan 2026 10:17:29 +0100 Subject: [PATCH 1/5] ci: fix pipeline --- .github/workflows/pipeline.yaml | 2 +- README.md | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml index b08d04a..f05e77e 100644 --- a/.github/workflows/pipeline.yaml +++ b/.github/workflows/pipeline.yaml @@ -37,7 +37,7 @@ jobs: path: dist/ upload_pypi: - needs: [build_dist] + needs: [pipeline] runs-on: ubuntu-latest # Upload to PyPI on release if: always() && github.event_name == 'release' && github.event.action == 'published' diff --git a/README.md b/README.md index b53d3cc..cbb11f2 100644 --- a/README.md +++ b/README.md @@ -13,17 +13,18 @@ The work also includes a section on related engineering challenges regarding jax ## Installation +### Pip Installation (Recommended) +```shell +pip install vlagents +``` + ### Local Installation ```shell -git clone https://github.com/juelg/vlagents.git +git clone https://https://github.com/RobotControlStack/vlagents.git cd vlagents pip install -ve . ``` -### Repo Installation -```shell -pip install git+https://github.com/juelg/vlagents.git -``` ### Environment and Policy Installation On top of vlagents you can then install a simulation environment where the agent acts. @@ -190,9 +191,8 @@ If you find the agent useful for your work, please consider citing the original ``` @inproceedings{juelg2025refinedpolicydistillationvla, title={{Refined Policy Distillation}: {F}rom {VLA} Generalists to {RL} Experts}, - author={Tobias Jülg and Wolfram Burgard and Florian Walter}, + author={Tobias J{\"u}lg and Wolfram Burgard and Florian Walter}, year={2025}, booktitle={Proc.~of the IEEE/RSJ Int.~Conf.~on Intelligent Robots and Systems (IROS)}, - note={Accepted for publication.} } ``` \ No newline at end of file From 3d4e14dad027cf8b11b6345c353c1ce7b6f0fd2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 16 Jan 2026 12:19:11 +0100 Subject: [PATCH 2/5] feat: added support for libero --- README.md | 18 ++++++++- src/vlagents/evaluator_envs.py | 73 +++++++++++++++++++++++++++++++--- 2 files changed, 84 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index cbb11f2..25abd37 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,20 @@ pip install -ve . ### Environment and Policy Installation On top of vlagents you can then install a simulation environment where the agent acts. -We currently support [maniskill](https://github.com/haosulab/ManiSkill) with more to come. +We currently the following environments: +- [maniskill](https://github.com/haosulab/ManiSkill) +- [robot control stack](https://github.com/RobotControlStack/robot-control-stack) +- [libero](https://github.com/Lifelong-Robot-Learning/LIBERO) + + In order to avoid dependency conflicts, use a second conda/pip environment to install your policy. -We currently support [octo](https://github.com/octo-models/octo) and [openvla](https://github.com/openvla/openvla). +We currently support the following policies: +- [octo](https://github.com/octo-models/octo) +- [openvla](https://github.com/openvla/openvla) +- [openpi](https://github.com/Physical-Intelligence/openpi) +- [vjepa2-ac](https://github.com/facebookresearch/vjepa2) +- [diffusion policy](https://github.com/real-stanford/diffusion_policy) + ### Octo To use Octo as an agent/policy you need to create a new conda environment: @@ -135,6 +146,9 @@ pip install -ve . ``` +### Diffusion Policy +Currently located on the branch `diffusion_policy`. + ## Usage To start an vlagents server use the `start-server` command where `kwargs` is a dictionary of the constructor arguments of the policy you want to start e.g. ```shell diff --git a/src/vlagents/evaluator_envs.py b/src/vlagents/evaluator_envs.py index e8306d4..fbf6c6f 100644 --- a/src/vlagents/evaluator_envs.py +++ b/src/vlagents/evaluator_envs.py @@ -132,11 +132,7 @@ def __init__(self, env_id, seed, **env_kwargs): # TODO: one could save only every nth episode by adding an episode counter which steps the record env only # when the counter is divisible by n otherwise steps the normal env logging.info(f"Creating ManiSkill env {env_id}") - if "video_dir" in env_kwargs: - output_dir = env_kwargs["video_dir"] - del env_kwargs["video_dir"] - else: - output_dir = None + output_dir = env_kwargs.pop("video_dir", None) super().__init__(env_id, seed, **env_kwargs) logging.info(f"Created ManiSkill env {env_id}") if "human_render_camera_configs" in env_kwargs: @@ -204,11 +200,78 @@ def do_import(): EvaluatorEnv.register("PokeCube-v1", ManiSkill) +class Libero(EvaluatorEnv): + + def __init__(self, env_id: str, seed: int, **env_kwargs) -> None: + logging.info("Creating Libero env") + self.env, self._language_instruction, self.task_name, self.task_suite, self.task_id, self.task = self._make_gym( + env_id, seed, **env_kwargs + ) + logging.info( + f"Created Libero env, task suite: {env_id}, task id: {self.task_id}, task name {self.task_name}, instruction: {self._language_instruction}" + ) + self.env_id = env_id + self.seed = seed + + def _make_gym(self, env_id, seed, **env_kwargs): + from libero.libero import benchmark, get_libero_path + from libero.libero.envs import OffScreenRenderEnv + + benchmark_dict = benchmark.get_benchmark_dict() + + task_suite = benchmark_dict[env_id]() + task_id = min(max(env_kwargs.pop("task_id", 0), 0), task_suite.n_tasks - 1) + task = task_suite.get_task(task_id) + + task_bddl_file = os.path.join(get_libero_path("bddl_files"), task.problem_folder, task.bddl_file) + env = OffScreenRenderEnv( + bddl_file_name=task_bddl_file, + **env_kwargs, + ) + env.seed(seed) + return env, task.language, task.name, task_suite, task_id, task + + def translate_obs(self, obs: dict[str, Any]) -> Obs: + return Obs( + cameras=dict(rgb_side=obs["agentview_image"]), + gripper=obs["robot0_gripper_qpos"] / 0.04, # normalize + ) + + def step(self, action: Act) -> tuple[Obs, float, bool, bool, dict]: + # change gripper to libero format (-1, 1) where -1 is open + action.action[-1] = (1 - action.action[-1]) * 2 - 1.0 + obs, reward, done, info = self.env.step(action.action) + return self.translate_obs(obs), reward, done, done, info + + def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Obs, dict[str, Any]]: + obs, info = self.env.reset() + init_states = self.task_suite.get_task_init_states( + self.task_id + ) # for benchmarking purpose, we fix the a set of initial states + init_state_id = 0 + self.env.set_init_state(init_states[init_state_id]) + + return self.translate_obs(obs), info + + @property + def language_instruction(self) -> str: + return self._language_instruction + + +EvaluatorEnv.register("libero_10", Libero) +EvaluatorEnv.register("libero_90", Libero) +EvaluatorEnv.register("libero_100", Libero) +EvaluatorEnv.register("libero_spatial", Libero) +EvaluatorEnv.register("libero_object", Libero) +EvaluatorEnv.register("libero_goal", Libero) + + @dataclass class EvalConfig: env_id: str env_kwargs: dict[str, Any] max_steps_per_episode: int = 100 + # TODO: add seed, on same machine and jpeg encoding @dataclass From c016f2e64bb3b0962b7fb7a1d901cfd3865831c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 16 Jan 2026 16:45:36 +0100 Subject: [PATCH 3/5] feat: libero fixes - added relative and absolute control options for libero - corrected gripper translation - added wrist camera - added doc string for libero - tested libero - refactored eval run into two functions, to be callable in python --- src/vlagents/__main__.py | 293 ++++++++++----------------------- src/vlagents/evaluator_envs.py | 106 ++++++------ 2 files changed, 137 insertions(+), 262 deletions(-) diff --git a/src/vlagents/__main__.py b/src/vlagents/__main__.py index f3797d5..74727ce 100644 --- a/src/vlagents/__main__.py +++ b/src/vlagents/__main__.py @@ -82,12 +82,12 @@ def _per_process( @main_app.command() -def run_eval_post_training( - wandb_project: Annotated[str, typer.Option(help="weights and biases logging project.")], - wandb_entity: Annotated[str, typer.Option(help="weights and biases logging entity.")], - wandb_note: Annotated[str, typer.Option(help="weights and biases logging note.")], - wandb_name: Annotated[str, typer.Option(help="weights and biases logging name.")], +def run_eval( output_path: Annotated[str, typer.Option(help="Path to store the run results.")], + wandb_project: Annotated[str | None, typer.Option(help="weights and biases logging project.")] = None, + wandb_entity: Annotated[str | None, typer.Option(help="weights and biases logging entity.")] = None, + wandb_note: Annotated[str | None, typer.Option(help="weights and biases logging note.")] = None, + wandb_name: Annotated[str | None, typer.Option(help="weights and biases logging name.")] = None, wandb_group: Annotated[str | None, typer.Option(help="weights and biases logging name.")] = None, steps: Annotated[str | None, typer.Option(help="steps to evaluate.")] = None, episodes: Annotated[int, typer.Option(help="Number of episodes to run.")] = 100, @@ -104,203 +104,63 @@ def run_eval_post_training( post training eval which goes over all checkpoints - each checkpoint with many envs """ - if steps is None: - steps = [None] - else: - steps = json.loads(steps) - if wandb_group == "": - wandb_group = None - - wandb.init( - entity=wandb_entity, - resume="allow", - project=wandb_project, - # config=dict(agent_name=agent_name, agent_kwargs=json.loads(kwargs), eval_cfgs=json.loads(eval_cfgs)), - notes=wandb_note, - job_type="eval", - name=wandb_name, - group=wandb_group, - ) - wandb_log_git_diff(output_path) - wandb.run.log_code(".") - - wandb.define_metric( - "total/success", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="max", - ) - wandb.define_metric( - "total/last_step_reward", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="max", + eval_cfgs_ = [EvalConfig(**cfg) for cfg in json.loads(eval_cfgs)] + agent_cfg_ = AgentConfig(**json.loads(agent_cfg)) + _run_eval( + output_path=output_path, + eval_cfgs=eval_cfgs_, + agent_cfg=agent_cfg_, + wandb_project=wandb_project, + wandb_entity=wandb_entity, + wandb_note=wandb_note, + wandb_name=wandb_name, + wandb_group=wandb_group, + steps=steps, + episodes=episodes, + n_processes=n_processes, + n_gpus=n_gpus, ) - wandb.define_metric( - "total/total_steps", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="min", - ) - wandb.define_metric( - "total/mean_reward", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="max", - ) - eval_cfgs = [EvalConfig(**cfg) for cfg in json.loads(eval_cfgs)] - for idx, env in enumerate(eval_cfgs): - wandb.define_metric( - f"{env.env_id}/success", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="max", - ) - wandb.define_metric( - f"{env.env_id}/last_step_reward", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="max", - ) - wandb.define_metric( - f"{env.env_id}/total_steps", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="min", - ) - wandb.define_metric( - f"{env.env_id}/mean_reward", - step_metric="train_step", - overwrite=False, - step_sync=False, - hidden=False, - summary="max", - ) - # distribute gpus equally - gpus_ids = [i % n_gpus for i in range(len(steps))] - # spawn n processes and run in parallel - - agent_cfgs = [AgentConfig(**json.loads(agent_cfg)) for _ in steps] - for idx in range(len(steps)): - agent_cfgs[idx].port += idx - with Pool(n_processes) as p: - args = [(step, agent_cfgs[idx], eval_cfgs, episodes, 1, gpus_ids[idx]) for idx, step in enumerate(steps)] - results = p.map(_per_process, args) - logging.info("Finished evaluation") - - for result in results: - per_env_results_last_reward, per_env_results_rewards, mean_rewards, step = result - step = step if step is not None else 0 - wandb_log_dict = { - "total/success": per_env_results_last_reward.mean(axis=(0, 1))[0], - "total/last_step_reward": per_env_results_last_reward.mean(axis=(0, 1))[1], - "total/total_steps": per_env_results_last_reward.mean(axis=(0, 1))[2], - "total/mean_reward": np.mean(mean_rewards), - "train_step": step, - } - # log for each env - for idx, env in enumerate(eval_cfgs): - wandb_log_dict.update( - { - f"{env.env_id}/success": per_env_results_last_reward[idx].mean(axis=0)[0], - f"{env.env_id}/last_step_reward": per_env_results_last_reward[idx].mean(axis=0)[1], - f"{env.env_id}/total_steps": per_env_results_last_reward[idx].mean(axis=0)[2], - f"{env.env_id}/mean_reward": mean_rewards[idx], - } - ) - wandb.log(wandb_log_dict, step=step, commit=True) - - path = write_results( - per_env_results_last_reward, - per_env_results_rewards, - eval_cfgs, - agent_cfg=agent_cfgs[0], - out=output_path, - ) - wandb.log_artifact(path, type="file", name="results", aliases=[f"step_{step}"]) - - -@main_app.command() -def run_eval_during_training( - wandb_id: Annotated[str, typer.Option(help="weights and biases logging id.")], - wandb_group: Annotated[str, typer.Option(help="weights and biases logging group.")], - wandb_project: Annotated[str, typer.Option(help="weights and biases logging project.")], - wandb_entity: Annotated[str, typer.Option(help="weights and biases logging entity.")], - wandb_note: Annotated[str, typer.Option(help="weights and biases logging note.")], - wandb_name: Annotated[str, typer.Option(help="weights and biases logging name.")], - output_path: Annotated[str, typer.Option(help="Path to store the run results.")], - wandb_first: Annotated[bool, typer.Option(help="whether its the first eval.")] = False, - episodes: Annotated[int, typer.Option(help="Number of episodes to run.")] = 100, - n_processes: Annotated[int | None, typer.Option(help="Number of processes to run.")] = None, - eval_cfgs: Annotated[ - str, typer.Option(help="Evaluation configurations.") - ] = '[{"env": "rcs/SimplePickUpSim-v0", "kwargs": {}}]', - agent_cfg: Annotated[ - str, typer.Option(help="Agent configuration.") - ] = '{"host": "localhost", "port": 8080, "agent_name": "Test", "agent_kwargs": {}, "python_path": "python"}', +def _run_eval( + output_path: str, + eval_cfgs: list[EvalConfig], + agent_cfg: AgentConfig, + wandb_project: str | None = None, + wandb_entity: str | None = None, + wandb_note: str | None = None, + wandb_name: str | None = None, + wandb_group: str | None = None, + steps: str | None = None, + episodes: int = 100, + n_processes: int | None = None, + n_gpus: int = 1, ): - """ - during training eval, all need to use the same id - - just for one model, but many envs - - can be new run but at least in the same project and same group as the training - """ - assert ( - agent_cfg["agent_name"] != "Test" - ), "agent_cfg needs to be passed as a json argument. See the default for an example." + if steps is None: + steps = [None] + else: + steps = json.loads(steps) + + agent_cfgs = [agent_cfg] * len(steps) + # TODO: make this a prober argument that is passed + os.environ["RUN_PATH"] = output_path - if wandb_first: + use_wandb = wandb_project is not None and wandb_entity is not None + if use_wandb: wandb.init( - id=wandb_id, entity=wandb_entity, resume="allow", - group=wandb_group, project=wandb_project, # config=dict(agent_name=agent_name, agent_kwargs=json.loads(kwargs), eval_cfgs=json.loads(eval_cfgs)), notes=wandb_note, job_type="eval", name=wandb_name, + group=wandb_group, ) - wandb_log_git_diff(output_path) wandb.run.log_code(".") - else: - wandb.init(id=wandb_id, entity=wandb_entity, resume="must", project=wandb_project) - eval_cfgs = [EvalConfig(**cfg) for cfg in json.loads(eval_cfgs)] - - agent_cfg = AgentConfig(**json.loads(agent_cfg)) - step = agent_cfg.agent_kwargs.get("checkpoint_step", 0) - step = step if step is not None else 0 - - per_env_results_last_reward, per_env_results_rewards = evaluation( - agent_cfg=agent_cfg, eval_cfgs=eval_cfgs, episodes=episodes, n_processes=n_processes - ) - - # return is [envs, episodes, 3(success, reward, steps)], [envs, episodes, rewards for all steps in the episode] - - flatten_rewards = [[item for sublist in env_rewards for item in sublist] for env_rewards in per_env_results_rewards] - mean_rewards = [np.mean(env_rewards) if env_rewards else 0.0 for env_rewards in flatten_rewards] - - # these new define metric to also not work with several jobs - # wandb says that logging can only be done in run groups - if wandb_first: wandb.define_metric( "total/success", step_metric="train_step", @@ -367,33 +227,50 @@ def run_eval_during_training( summary="max", ) - wandb_log_dict = { - "total/success": per_env_results_last_reward.mean(axis=(0, 1))[0], - "total/last_step_reward": per_env_results_last_reward.mean(axis=(0, 1))[1], - "total/total_steps": per_env_results_last_reward.mean(axis=(0, 1))[2], - "total/mean_reward": np.mean(mean_rewards), - "train_step": step, - } + # distribute gpus equally + gpus_ids = [i % n_gpus for i in range(len(steps))] + + # spawn n processes and run in parallel + + for idx in range(len(steps)): + agent_cfgs[idx].port += idx + with Pool(n_processes) as p: + args = [(step, agent_cfgs[idx], eval_cfgs, episodes, 1, gpus_ids[idx]) for idx, step in enumerate(steps)] + results = p.map(_per_process, args) + logging.info("Finished evaluation") - # log for each env - for idx, env in enumerate(eval_cfgs): - wandb_log_dict.update( - { - f"{env.env_id}/success": per_env_results_last_reward[idx].mean(axis=0)[0], - f"{env.env_id}/last_step_reward": per_env_results_last_reward[idx].mean(axis=0)[1], - f"{env.env_id}/total_steps": per_env_results_last_reward[idx].mean(axis=0)[2], - f"{env.env_id}/mean_reward": mean_rewards[idx], + for result in results: + per_env_results_last_reward, per_env_results_rewards, mean_rewards, step = result + if use_wandb: + step = step if step is not None else 0 + wandb_log_dict = { + "total/success": per_env_results_last_reward.mean(axis=(0, 1))[0], + "total/last_step_reward": per_env_results_last_reward.mean(axis=(0, 1))[1], + "total/total_steps": per_env_results_last_reward.mean(axis=(0, 1))[2], + "total/mean_reward": np.mean(mean_rewards), + "train_step": step, } + # log for each env + for idx, env in enumerate(eval_cfgs): + wandb_log_dict.update( + { + f"{env.env_id}/success": per_env_results_last_reward[idx].mean(axis=0)[0], + f"{env.env_id}/last_step_reward": per_env_results_last_reward[idx].mean(axis=0)[1], + f"{env.env_id}/total_steps": per_env_results_last_reward[idx].mean(axis=0)[2], + f"{env.env_id}/mean_reward": mean_rewards[idx], + } + ) + wandb.log(wandb_log_dict, step=step, commit=True) + + path = write_results( + per_env_results_last_reward, + per_env_results_rewards, + eval_cfgs, + agent_cfg=agent_cfgs[0], + out=output_path, ) - wandb.log(wandb_log_dict, step=step, commit=True) - path = write_results( - per_env_results_last_reward, - per_env_results_rewards, - eval_cfgs, - agent_cfg=agent_cfg, - out=output_path, - ) - wandb.log_artifact(path, type="file", name="results", aliases=[f"step_{step}"]) + if use_wandb: + wandb.log_artifact(path, type="file", name="results", aliases=[f"step_{step}"]) if __name__ == "__main__": diff --git a/src/vlagents/evaluator_envs.py b/src/vlagents/evaluator_envs.py index fbf6c6f..ff7c470 100644 --- a/src/vlagents/evaluator_envs.py +++ b/src/vlagents/evaluator_envs.py @@ -202,10 +202,20 @@ def do_import(): class Libero(EvaluatorEnv): - def __init__(self, env_id: str, seed: int, **env_kwargs) -> None: + def __init__(self, env_id: str, seed: int, reset_steps: int = 14, **env_kwargs) -> None: + """ + For supported env_kwargs checkout ControlEnv class in libero. + We add the following env_kwargs on top: + - task_id (int): libero task id for given task suite. The number of tasks per task suite can checked with Libero.n_tasks(env_id). Defaults to 0. + - control_mode (str): either 'relative' or 'absolute'. Defaults to 'relative'. + + """ logging.info("Creating Libero env") + self.env_kwargs = env_kwargs + self.reset_steps = reset_steps + self.control_mode = self.env_kwargs.pop("control_mode", "relative") self.env, self._language_instruction, self.task_name, self.task_suite, self.task_id, self.task = self._make_gym( - env_id, seed, **env_kwargs + env_id, seed, **self.env_kwargs ) logging.info( f"Created Libero env, task suite: {env_id}, task id: {self.task_id}, task name {self.task_name}, instruction: {self._language_instruction}" @@ -213,7 +223,16 @@ def __init__(self, env_id: str, seed: int, **env_kwargs) -> None: self.env_id = env_id self.seed = seed - def _make_gym(self, env_id, seed, **env_kwargs): + @staticmethod + def n_tasks(env_id: str) -> int: + from libero.libero import benchmark, get_libero_path + + benchmark_dict = benchmark.get_benchmark_dict() + task_suite = benchmark_dict[env_id]() + return task_suite.n_tasks + + @staticmethod + def _make_gym(env_id, seed, **env_kwargs): from libero.libero import benchmark, get_libero_path from libero.libero.envs import OffScreenRenderEnv @@ -229,29 +248,48 @@ def _make_gym(self, env_id, seed, **env_kwargs): **env_kwargs, ) env.seed(seed) + return env, task.language, task.name, task_suite, task_id, task def translate_obs(self, obs: dict[str, Any]) -> Obs: return Obs( - cameras=dict(rgb_side=obs["agentview_image"]), + cameras=dict(rgb_side=obs["agentview_image"][::-1], rgb_wrist=obs["robot0_eye_in_hand_image"][::-1]), gripper=obs["robot0_gripper_qpos"] / 0.04, # normalize ) def step(self, action: Act) -> tuple[Obs, float, bool, bool, dict]: # change gripper to libero format (-1, 1) where -1 is open - action.action[-1] = (1 - action.action[-1]) * 2 - 1.0 - obs, reward, done, info = self.env.step(action.action) - return self.translate_obs(obs), reward, done, done, info + act = np.copy(action.action) + act[-1] = (1 - act[-1]) * 2 - 1.0 + obs, reward, done, info = self.env.step(act) + success = self.env.check_success() + return self.translate_obs(obs), reward, success, done, info def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[Obs, dict[str, Any]]: - obs, info = self.env.reset() + obs = self.env.reset() init_states = self.task_suite.get_task_init_states( self.task_id ) # for benchmarking purpose, we fix the a set of initial states init_state_id = 0 self.env.set_init_state(init_states[init_state_id]) - return self.translate_obs(obs), info + for robot in self.env.robots: + robot.controller.use_delta = True + for _ in range(self.reset_steps): + obs, _, _, _ = self.env.step( + np.zeros(8) if "JOINT" in self.env_kwargs.get("controller", "OSC_POSE") else np.zeros(7) + ) + + if self.control_mode == "absolute": + for robot in self.env.robots: + robot.controller.use_delta = False + elif self.control_mode == "relative": + for robot in self.env.robots: + robot.controller.use_delta = True + else: + raise ValueError(f"Invalid control mode: {self.control_mode}, use 'absolute' or 'relative'.") + + return self.translate_obs(obs), {} @property def language_instruction(self) -> str: @@ -285,11 +323,11 @@ class AgentConfig: def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[list[float], list[float], list[float]]: - logging.debug(f"Starting evaluation of {env.env.unwrapped.spec.id}") + logging.debug(f"Starting evaluation") obs, _ = env.reset(options={}) - logging.debug(f"Reset env {env.env.unwrapped.spec.id}") + logging.debug(f"Reset env") agent.reset(obs, env.language_instruction) - logging.debug(f"Reset agent {env.env.unwrapped.spec.id}") + logging.debug(f"Reset agent") done = False truncated = False step = 0.0 @@ -320,9 +358,7 @@ def single_eval(env: EvaluatorEnv, agent: Agent, max_steps: int, i) -> tuple[lis ) env.reset(options={}) - logging.debug( - f"Finished evaluation of {env.env.unwrapped.spec.id} with {step} steps and reward {reward}, success {done}" - ) + logging.debug(f"Finished evaluation with {step} steps and reward {reward}, success {done}") # success, last reward and number of steps return done, rewards, step @@ -441,7 +477,6 @@ def evaluation( with start_server( agent_cfg.agent_name, agent_cfg.agent_kwargs, agent_cfg.port, agent_cfg.host, agent_cfg.python_path ): - sleep(30) res = multi_eval(agent_cfg, eval_cfgs, episodes, n_processes) except Exception: # Ensures you SEE the client's stack trace and any logged errors. @@ -458,44 +493,7 @@ def evaluation( return res -def run_eval_during_training( - agent_cfg: AgentConfig, - eval_cfgs: list[EvalConfig], - wandb_id: str, - wandb_entity: str, - wandb_group: str, - wandb_project: str, - wandb_note: str, - wandb_name: str, - slurm: Slurm, - output_path: str, - wandb_first: bool = False, - episodes: int = 100, - n_processes: int | None = None, - cmd=None, -): - if cmd is None: - cmd = ["python"] - cmd += [ - "-m", - "vlagents" "run-eval-during-training", - f"--agent-cfg={json.dumps(asdict(agent_cfg))}" f"--episodes={episodes}", - f"--n-processes={n_processes}", - f"--eval-cfgs={json.dumps([asdict(cfg) for cfg in eval_cfgs])}", - f"--wandb-id={wandb_id}", - f"--wandb-group={wandb_group.replace(':', '_')}", - f"--wandb-project={wandb_project}", - f"--wandb-entity={wandb_entity}", - f"--wandb-note={wandb_note}", - f"--wandb-name={wandb_name}", - f"--output-path={output_path}", - ] - if wandb_first: - cmd.append("--wandb-first") - slurm.sbatch(shlex.join(cmd)) - - -def run_eval_post_training( +def run_eval( agent_cfg: AgentConfig, eval_cfgs: list[EvalConfig], wandb_entity: str, From e56ed5e1e0343f55949d5e821b18fca39385666a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 16 Jan 2026 16:55:16 +0100 Subject: [PATCH 4/5] feat: shm and jpeg configurable --- src/tests/test_libero.py | 51 ++++++++++++++++++++++++++++++++++ src/vlagents/evaluator_envs.py | 15 ++++++++-- 2 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 src/tests/test_libero.py diff --git a/src/tests/test_libero.py b/src/tests/test_libero.py new file mode 100644 index 0000000..67d422f --- /dev/null +++ b/src/tests/test_libero.py @@ -0,0 +1,51 @@ +if __name__ == "__main__": + import datetime + import os + + import numpy as np + from PIL import Image + + from lerobot.envs.libero import LiberoEnv + from vlagents.__main__ import _run_eval + from vlagents.evaluator_envs import AgentConfig, EvalConfig + + # main_app() + # test + os.environ["RUN_PATH"] = "test_output" + _run_eval( + output_path="test_output", + eval_cfgs=[ + EvalConfig( + env_id="libero_10", + env_kwargs={"controller": "OSC_POSE", "camera_heights": 256, "camera_widths": 256}, + max_steps_per_episode=100, + ) + ], + agent_cfg=AgentConfig(host="localhost", port=8080, agent_name="test", agent_kwargs={}), + n_processes=1, + n_gpus=1, + episodes=1, + ) + + # from libero.libero import benchmark, get_libero_path + # from libero.libero.envs import OffScreenRenderEnv + # benchmark_dict = benchmark.get_benchmark_dict() + + # task_suite = benchmark_dict["libero_10"]() + # env = LiberoEnv(task_suite, task_id=0, task_suite_name="libero_10") + # env.reset() + # im = [] + # im2 = [] + # for i in range(100): + # obs, reward, done, truncated, info = env.step(np.zeros(7)) + # im2.append(Image.fromarray(obs["pixels"]["image"])) + # im.append(Image.fromarray(obs["pixels"]["image2"])) + # env.close() + + # im[0].save( + # f"{str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}.gif", + # save_all=True, + # append_images=im[1:], + # duration=0.2 * 1000, + # loop=0, + # ) diff --git a/src/vlagents/evaluator_envs.py b/src/vlagents/evaluator_envs.py index ff7c470..d30c4bf 100644 --- a/src/vlagents/evaluator_envs.py +++ b/src/vlagents/evaluator_envs.py @@ -276,6 +276,7 @@ def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) for robot in self.env.robots: robot.controller.use_delta = True for _ in range(self.reset_steps): + # steps the environment to filter out falling objects obs, _, _, _ = self.env.step( np.zeros(8) if "JOINT" in self.env_kwargs.get("controller", "OSC_POSE") else np.zeros(7) ) @@ -309,7 +310,9 @@ class EvalConfig: env_id: str env_kwargs: dict[str, Any] max_steps_per_episode: int = 100 - # TODO: add seed, on same machine and jpeg encoding + seed: int = 42 + same_machine: bool = False + jpeg_encoding: bool = False @dataclass @@ -373,7 +376,13 @@ def create_env_agent(agent_config: AgentConfig, cfg: EvalConfig, seed: int) -> t logging.info(f"env {cfg.env_id} not available, creating new env and agent") env = EvaluatorEnv.make(cfg.env_id, seed=seed, **cfg.env_kwargs) logging.info("done creating env") - agent = RemoteAgent(agent_config.host, agent_config.port, agent_config.agent_name) + agent = RemoteAgent( + agent_config.host, + agent_config.port, + agent_config.agent_name, + on_same_machine=cfg.same_machine, + jpeg_encoding=cfg.jpeg_encoding, + ) logging.info("done creating agent") per_process_cache[key] = (env, agent) return per_process_cache[key] @@ -402,7 +411,7 @@ def multi_eval( # single_results = p.map(run_episode, args) # without process - np.random.seed(42) + np.random.seed(cfgs[0].seed) args = [(i, cfgs, episodes, agent_cfg) for i in range(len(cfgs) * episodes)] single_results = [run_episode(arg) for arg in tqdm(args)] From a62b8b0bfefaea767bb09140bf5ba8d71a1f7b62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 16 Jan 2026 16:57:58 +0100 Subject: [PATCH 5/5] ci: only format py module --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 195c455..d4b6fb2 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ PYSRC = src # Python checkformat: - isort --check-only ${PYSRC} - black --check ${PYSRC} + isort --check-only ${PYSRC}/vlagents + black --check ${PYSRC}/vlagents format: isort ${PYSRC}