From bb30f46c4d672a4d4ee7ba340779cf1ebb28b804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 26 Aug 2025 18:56:24 +0200 Subject: [PATCH 1/5] feat: added openpi model --- src/agents/policies.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/agents/policies.py b/src/agents/policies.py index e88ccd7..14dc780 100644 --- a/src/agents/policies.py +++ b/src/agents/policies.py @@ -124,6 +124,42 @@ def reset(self, obs: Obs, instruction: Any, **kwargs) -> dict[str, Any]: return info +class OpenPiModel(Agent): + + def __init__( + self, + model_name: str = "pi0_fast_droid", + default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_fast_droid", + **kwargs, + ) -> None: + super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs) + from openpi.training import config + + self.cfg = config.get_config(model_name) + + def initialize(self): + from openpi.policies import policy_config + from openpi.shared import download + + checkpoint_dir = download.maybe_download(self.checkpoint_path) + + # Create a trained policy. + self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir) + + def act(self, obs: Obs) -> Act: + # Run inference on a dummy example. + observation = {f"observation/{k}": v for k, v in obs.cameras.items()} + observation.update( + { + "observation/joint_position": obs.info["joints"], + "observation/gripper_position": obs.gripper, + "prompt": self.instruction, + } + ) + action_chunk = self.policy.infer(observation)["actions"] + return Act(action=action_chunk[0]) + + class OpenVLAModel(Agent): # === Utilities === SYSTEM_PROMPT = ( @@ -457,4 +493,5 @@ def act(self, obs: Obs) -> Act: openvla=OpenVLAModel, octodist=OctoActionDistribution, openvladist=OpenVLADistribution, + openpi=OpenPiModel, ) From 040c29e1e291c3300d509e747766ffb433c9bcbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Mon, 1 Sep 2025 21:55:02 +0200 Subject: [PATCH 2/5] fix: openpi checkpoint path --- src/agents/policies.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/agents/policies.py b/src/agents/policies.py index 14dc780..78220f4 100644 --- a/src/agents/policies.py +++ b/src/agents/policies.py @@ -128,31 +128,36 @@ class OpenPiModel(Agent): def __init__( self, - model_name: str = "pi0_fast_droid", - default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_fast_droid", + model_name: str = "pi0_droid", + default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_droid", **kwargs, ) -> None: + # https://console.cloud.google.com/storage/browser/openpi-assets/checkpoints/pi0_droid super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs) from openpi.training import config + logging.info(f"checkpoint_path: {self.checkpoint_path}, checkpoint_step: {self.checkpoint_step}") + self.openpi_path = self.checkpoint_path.format(checkpoint_step=self.checkpoint_step) + self.cfg = config.get_config(model_name) def initialize(self): from openpi.policies import policy_config from openpi.shared import download - checkpoint_dir = download.maybe_download(self.checkpoint_path) + checkpoint_dir = download.maybe_download(self.openpi_path) # Create a trained policy. self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir) def act(self, obs: Obs) -> Act: # Run inference on a dummy example. - observation = {f"observation/{k}": v for k, v in obs.cameras.items()} + # observation = {f"observation/{k}": v for k, v in obs.cameras.items()} + observation = {} observation.update( { - "observation/joint_position": obs.info["joints"], - "observation/gripper_position": obs.gripper, + "observation/image": np.copy(obs.cameras["rgb_side"]).transpose(2, 0, 1), + "observation/state": np.concatenate([obs.info["joints"], [obs.gripper]]), "prompt": self.instruction, } ) From 2b6021179575252f67b5ba36868a8a8b49510124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 3 Sep 2025 11:28:44 +0200 Subject: [PATCH 3/5] doc: added readme instructions for openpi --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 6f1acd0..9e01f69 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,24 @@ pip install git+https://github.com/juelg/agents.git For more details, see the [OpenVLA github page](https://github.com/openvla/openvla). +### OpenPi / Pi0 +To use OpenPi, create a new conda environment: +```shell +conda create -n openpi python=3.11 -y +conda activate openpi +``` +Clone the repo and install it. +```shell +git clone --recurse-submodules git@github.com:Physical-Intelligence/openpi.git +# Or if you already cloned the repo: +git submodule update --init --recursive +# install dependencies +GIT_LFS_SKIP_SMUDGE=1 uv sync +GIT_LFS_SKIP_SMUDGE=1 uv pip install -e . +``` +For more details see [openpi's github](https://github.com/Physical-Intelligence/openpi). + + ## Usage To start an agents server use the `start-server` command where `kwargs` is a dictionary of the constructor arguments of the policy you want to start e.g. ```shell @@ -107,6 +125,8 @@ To start an agents server use the `start-server` command where `kwargs` is a dic python -m agents start-server octo --host localhost --port 8080 --kwargs '{"checkpoint_path": "hf://Juelg/octo-base-1.5-finetuned-maniskill", "checkpoint_step": None, "horizon": 1, "unnorm_key": []}' # openvla python -m agents start-server openvla --host localhost --port 8080 --kwargs '{"checkpoint_path": "Juelg/openvla-7b-finetuned-maniskill", "device": "cuda:0", "attn_implementation": "flash_attention_2", "unnorm_key": "maniskill_human:7.0.0", "checkpoint_step": 40000}' +# openpi +python -m agents start-server openpi --port=8080 --host=localhost --kwargs='{"checkpoint_path": "/{checkpoint_step}", "model_name": "pi0_rcs", "checkpoint_step": }' # leave "{checkpoint_step}" it will be replaced, "model_name" is the key for the training config ``` There is also the `run-eval-during-training` command to evaluate a model during training, so a single checkpoint. From ce905a20478ac77ed418cc9fcf85e600a4646560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 3 Sep 2025 20:31:05 +0200 Subject: [PATCH 4/5] fix(openpi): gripper definition --- README.md | 2 +- src/agents/policies.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 9e01f69..5700aac 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ python -m agents start-server octo --host localhost --port 8080 --kwargs '{"chec # openvla python -m agents start-server openvla --host localhost --port 8080 --kwargs '{"checkpoint_path": "Juelg/openvla-7b-finetuned-maniskill", "device": "cuda:0", "attn_implementation": "flash_attention_2", "unnorm_key": "maniskill_human:7.0.0", "checkpoint_step": 40000}' # openpi -python -m agents start-server openpi --port=8080 --host=localhost --kwargs='{"checkpoint_path": "/{checkpoint_step}", "model_name": "pi0_rcs", "checkpoint_step": }' # leave "{checkpoint_step}" it will be replaced, "model_name" is the key for the training config +python -m agents start-server openpi --port=8080 --host=localhost --kwargs='{"checkpoint_path": "/{checkpoint_step}", "train_config_name": "pi0_rcs", "checkpoint_step": }' # leave "{checkpoint_step}" it will be replaced, "train_config_name" is the key for the training config ``` There is also the `run-eval-during-training` command to evaluate a model during training, so a single checkpoint. diff --git a/src/agents/policies.py b/src/agents/policies.py index 78220f4..20ea90a 100644 --- a/src/agents/policies.py +++ b/src/agents/policies.py @@ -128,18 +128,17 @@ class OpenPiModel(Agent): def __init__( self, - model_name: str = "pi0_droid", + train_config_name: str = "pi0_droid", default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_droid", **kwargs, ) -> None: - # https://console.cloud.google.com/storage/browser/openpi-assets/checkpoints/pi0_droid super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs) from openpi.training import config logging.info(f"checkpoint_path: {self.checkpoint_path}, checkpoint_step: {self.checkpoint_step}") self.openpi_path = self.checkpoint_path.format(checkpoint_step=self.checkpoint_step) - self.cfg = config.get_config(model_name) + self.cfg = config.get_config(train_config_name) def initialize(self): from openpi.policies import policy_config @@ -151,17 +150,19 @@ def initialize(self): self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir) def act(self, obs: Obs) -> Act: - # Run inference on a dummy example. - # observation = {f"observation/{k}": v for k, v in obs.cameras.items()} - observation = {} + observation = {f"observation/{k}": np.copy(v).transpose(2, 0, 1) for k, v in obs.cameras.items()} observation.update( { - "observation/image": np.copy(obs.cameras["rgb_side"]).transpose(2, 0, 1), - "observation/state": np.concatenate([obs.info["joints"], [obs.gripper]]), + # openpi expects 0 as gripper open and 1 as closed + "observation/state": np.concatenate([obs.info["joints"], [1 - obs.gripper]]), "prompt": self.instruction, } ) action_chunk = self.policy.infer(observation)["actions"] + + # convert gripper action into agents format + action_chunk[:, -1] = 1 - action_chunk[:, -1] + return Act(action=action_chunk[0]) From 5e06661a448c8d4f5bc616ef75565e87a3cf1d52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 3 Sep 2025 20:41:26 +0200 Subject: [PATCH 5/5] feat(openpi): configurable execution horizon for action chunks --- src/agents/policies.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/agents/policies.py b/src/agents/policies.py index 20ea90a..be8fdb4 100644 --- a/src/agents/policies.py +++ b/src/agents/policies.py @@ -130,6 +130,7 @@ def __init__( self, train_config_name: str = "pi0_droid", default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_droid", + execution_horizon=20, **kwargs, ) -> None: super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs) @@ -139,6 +140,10 @@ def __init__( self.openpi_path = self.checkpoint_path.format(checkpoint_step=self.checkpoint_step) self.cfg = config.get_config(train_config_name) + self.execution_horizon = execution_horizon + + self.chunk_counter = self.execution_horizon + self._cached_action_chunk = None def initialize(self): from openpi.policies import policy_config @@ -150,6 +155,12 @@ def initialize(self): self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir) def act(self, obs: Obs) -> Act: + if self.chunk_counter < self.execution_horizon: + self.chunk_counter += 1 + return Act(action=self._cached_action_chunk[self.chunk_counter]) + + else: + self.chunk_counter = 0 observation = {f"observation/{k}": np.copy(v).transpose(2, 0, 1) for k, v in obs.cameras.items()} observation.update( { @@ -162,9 +173,16 @@ def act(self, obs: Obs) -> Act: # convert gripper action into agents format action_chunk[:, -1] = 1 - action_chunk[:, -1] + self._cached_action_chunk = action_chunk return Act(action=action_chunk[0]) + def reset(self, obs: Obs, instruction: Any): + super().reset(obs, instruction) + self.chunk_counter = self.execution_horizon + self._cached_action_chunk = None + return {} + class OpenVLAModel(Agent): # === Utilities ===