From bb30f46c4d672a4d4ee7ba340779cf1ebb28b804 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 26 Aug 2025 18:56:24 +0200
Subject: [PATCH 1/5] feat: added openpi model

---
 src/agents/policies.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/agents/policies.py b/src/agents/policies.py
index e88ccd7..14dc780 100644
--- a/src/agents/policies.py
+++ b/src/agents/policies.py
@@ -124,6 +124,42 @@ def reset(self, obs: Obs, instruction: Any, **kwargs) -> dict[str, Any]:
         return info
 
 
+class OpenPiModel(Agent):
+
+    def __init__(
+        self,
+        model_name: str = "pi0_fast_droid",
+        default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_fast_droid",
+        **kwargs,
+    ) -> None:
+        super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs)
+        from openpi.training import config
+
+        self.cfg = config.get_config(model_name)
+
+    def initialize(self):
+        from openpi.policies import policy_config
+        from openpi.shared import download
+
+        checkpoint_dir = download.maybe_download(self.checkpoint_path)
+
+        # Create a trained policy.
+        self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir)
+
+    def act(self, obs: Obs) -> Act:
+        # Run inference on a dummy example.
+        observation = {f"observation/{k}": v for k, v in obs.cameras.items()}
+        observation.update(
+            {
+                "observation/joint_position": obs.info["joints"],
+                "observation/gripper_position": obs.gripper,
+                "prompt": self.instruction,
+            }
+        )
+        action_chunk = self.policy.infer(observation)["actions"]
+        return Act(action=action_chunk[0])
+
+
 class OpenVLAModel(Agent):
     # === Utilities ===
     SYSTEM_PROMPT = (
@@ -457,4 +493,5 @@ def act(self, obs: Obs) -> Act:
     openvla=OpenVLAModel,
     octodist=OctoActionDistribution,
     openvladist=OpenVLADistribution,
+    openpi=OpenPiModel,
 )

From 040c29e1e291c3300d509e747766ffb433c9bcbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Mon, 1 Sep 2025 21:55:02 +0200
Subject: [PATCH 2/5] fix: openpi checkpoint path

---
 src/agents/policies.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/agents/policies.py b/src/agents/policies.py
index 14dc780..78220f4 100644
--- a/src/agents/policies.py
+++ b/src/agents/policies.py
@@ -128,31 +128,36 @@ class OpenPiModel(Agent):
 
     def __init__(
         self,
-        model_name: str = "pi0_fast_droid",
-        default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_fast_droid",
+        model_name: str = "pi0_droid",
+        default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_droid",
         **kwargs,
     ) -> None:
+        # https://console.cloud.google.com/storage/browser/openpi-assets/checkpoints/pi0_droid
         super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs)
         from openpi.training import config
 
+        logging.info(f"checkpoint_path: {self.checkpoint_path}, checkpoint_step: {self.checkpoint_step}")
+        self.openpi_path = self.checkpoint_path.format(checkpoint_step=self.checkpoint_step)
+
         self.cfg = config.get_config(model_name)
 
     def initialize(self):
         from openpi.policies import policy_config
         from openpi.shared import download
 
-        checkpoint_dir = download.maybe_download(self.checkpoint_path)
+        checkpoint_dir = download.maybe_download(self.openpi_path)
 
         # Create a trained policy.
         self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir)
 
     def act(self, obs: Obs) -> Act:
         # Run inference on a dummy example.
-        observation = {f"observation/{k}": v for k, v in obs.cameras.items()}
+        # observation = {f"observation/{k}": v for k, v in obs.cameras.items()}
+        observation = {}
         observation.update(
             {
-                "observation/joint_position": obs.info["joints"],
-                "observation/gripper_position": obs.gripper,
+                "observation/image": np.copy(obs.cameras["rgb_side"]).transpose(2, 0, 1),
+                "observation/state": np.concatenate([obs.info["joints"], [obs.gripper]]),
                 "prompt": self.instruction,
             }
         )

From 2b6021179575252f67b5ba36868a8a8b49510124 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 3 Sep 2025 11:28:44 +0200
Subject: [PATCH 3/5] doc: added readme instructions for openpi

---
 README.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/README.md b/README.md
index 6f1acd0..9e01f69 100644
--- a/README.md
+++ b/README.md
@@ -100,6 +100,24 @@ pip install git+https://github.com/juelg/agents.git
 
 For more details, see the [OpenVLA github page](https://github.com/openvla/openvla).
 
+### OpenPi / Pi0
+To use OpenPi, create a new conda environment:
+```shell
+conda create -n openpi python=3.11 -y
+conda activate openpi
+```
+Clone the repo and install it.
+```shell
+git clone --recurse-submodules git@github.com:Physical-Intelligence/openpi.git
+# Or if you already cloned the repo:
+git submodule update --init --recursive
+# install dependencies
+GIT_LFS_SKIP_SMUDGE=1 uv sync
+GIT_LFS_SKIP_SMUDGE=1 uv pip install -e .
+```
+For more details see [openpi's github](https://github.com/Physical-Intelligence/openpi).
+
+
 ## Usage
 To start an agents server use the `start-server` command where `kwargs` is a dictionary of the constructor arguments of the policy you want to start e.g.
 ```shell
@@ -107,6 +125,8 @@ To start an agents server use the `start-server` command where `kwargs` is a dic
 python -m agents start-server octo --host localhost --port 8080 --kwargs '{"checkpoint_path": "hf://Juelg/octo-base-1.5-finetuned-maniskill", "checkpoint_step": None, "horizon": 1, "unnorm_key": []}'
 # openvla
 python -m agents start-server openvla --host localhost --port 8080 --kwargs '{"checkpoint_path": "Juelg/openvla-7b-finetuned-maniskill", "device": "cuda:0", "attn_implementation": "flash_attention_2", "unnorm_key": "maniskill_human:7.0.0", "checkpoint_step": 40000}'
+# openpi
+python -m agents start-server openpi --port=8080 --host=localhost --kwargs='{"checkpoint_path": "<path to checkpoint>/{checkpoint_step}", "model_name": "pi0_rcs", "checkpoint_step": <checkpoint_step>}' # leave "{checkpoint_step}" it will be replaced, "model_name" is the key for the training config
 ```
 
 There is also the `run-eval-during-training` command to evaluate a model during training, so a single checkpoint.

From ce905a20478ac77ed418cc9fcf85e600a4646560 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 3 Sep 2025 20:31:05 +0200
Subject: [PATCH 4/5] fix(openpi): gripper definition

---
 README.md              |  2 +-
 src/agents/policies.py | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 9e01f69..5700aac 100644
--- a/README.md
+++ b/README.md
@@ -126,7 +126,7 @@ python -m agents start-server octo --host localhost --port 8080 --kwargs '{"chec
 # openvla
 python -m agents start-server openvla --host localhost --port 8080 --kwargs '{"checkpoint_path": "Juelg/openvla-7b-finetuned-maniskill", "device": "cuda:0", "attn_implementation": "flash_attention_2", "unnorm_key": "maniskill_human:7.0.0", "checkpoint_step": 40000}'
 # openpi
-python -m agents start-server openpi --port=8080 --host=localhost --kwargs='{"checkpoint_path": "<path to checkpoint>/{checkpoint_step}", "model_name": "pi0_rcs", "checkpoint_step": <checkpoint_step>}' # leave "{checkpoint_step}" it will be replaced, "model_name" is the key for the training config
+python -m agents start-server openpi --port=8080 --host=localhost --kwargs='{"checkpoint_path": "<path to checkpoint>/{checkpoint_step}", "train_config_name": "pi0_rcs", "checkpoint_step": <checkpoint_step>}' # leave "{checkpoint_step}" it will be replaced, "train_config_name" is the key for the training config
 ```
 
 There is also the `run-eval-during-training` command to evaluate a model during training, so a single checkpoint.
diff --git a/src/agents/policies.py b/src/agents/policies.py
index 78220f4..20ea90a 100644
--- a/src/agents/policies.py
+++ b/src/agents/policies.py
@@ -128,18 +128,17 @@ class OpenPiModel(Agent):
 
     def __init__(
         self,
-        model_name: str = "pi0_droid",
+        train_config_name: str = "pi0_droid",
         default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_droid",
         **kwargs,
     ) -> None:
-        # https://console.cloud.google.com/storage/browser/openpi-assets/checkpoints/pi0_droid
         super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs)
         from openpi.training import config
 
         logging.info(f"checkpoint_path: {self.checkpoint_path}, checkpoint_step: {self.checkpoint_step}")
         self.openpi_path = self.checkpoint_path.format(checkpoint_step=self.checkpoint_step)
 
-        self.cfg = config.get_config(model_name)
+        self.cfg = config.get_config(train_config_name)
 
     def initialize(self):
         from openpi.policies import policy_config
@@ -151,17 +150,19 @@ def initialize(self):
         self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir)
 
     def act(self, obs: Obs) -> Act:
-        # Run inference on a dummy example.
-        # observation = {f"observation/{k}": v for k, v in obs.cameras.items()}
-        observation = {}
+        observation = {f"observation/{k}": np.copy(v).transpose(2, 0, 1) for k, v in obs.cameras.items()}
         observation.update(
             {
-                "observation/image": np.copy(obs.cameras["rgb_side"]).transpose(2, 0, 1),
-                "observation/state": np.concatenate([obs.info["joints"], [obs.gripper]]),
+                # openpi expects 0 as gripper open and 1 as closed
+                "observation/state": np.concatenate([obs.info["joints"], [1 - obs.gripper]]),
                 "prompt": self.instruction,
             }
         )
         action_chunk = self.policy.infer(observation)["actions"]
+
+        # convert gripper action into agents format
+        action_chunk[:, -1] = 1 - action_chunk[:, -1]
+
         return Act(action=action_chunk[0])
 
 

From 5e06661a448c8d4f5bc616ef75565e87a3cf1d52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 3 Sep 2025 20:41:26 +0200
Subject: [PATCH 5/5] feat(openpi): configurable execution horizon for action
 chunks

---
 src/agents/policies.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/agents/policies.py b/src/agents/policies.py
index 20ea90a..be8fdb4 100644
--- a/src/agents/policies.py
+++ b/src/agents/policies.py
@@ -130,6 +130,7 @@ def __init__(
         self,
         train_config_name: str = "pi0_droid",
         default_checkpoint_path: str = "gs://openpi-assets/checkpoints/pi0_droid",
+        execution_horizon=20,
         **kwargs,
     ) -> None:
         super().__init__(default_checkpoint_path=default_checkpoint_path, **kwargs)
@@ -139,6 +140,10 @@ def __init__(
         self.openpi_path = self.checkpoint_path.format(checkpoint_step=self.checkpoint_step)
 
         self.cfg = config.get_config(train_config_name)
+        self.execution_horizon = execution_horizon
+
+        self.chunk_counter = self.execution_horizon
+        self._cached_action_chunk = None
 
     def initialize(self):
         from openpi.policies import policy_config
@@ -150,6 +155,12 @@ def initialize(self):
         self.policy = policy_config.create_trained_policy(self.cfg, checkpoint_dir)
 
     def act(self, obs: Obs) -> Act:
+        if self.chunk_counter < self.execution_horizon:
+            self.chunk_counter += 1
+            return Act(action=self._cached_action_chunk[self.chunk_counter])
+
+        else:
+            self.chunk_counter = 0
         observation = {f"observation/{k}": np.copy(v).transpose(2, 0, 1) for k, v in obs.cameras.items()}
         observation.update(
             {
@@ -162,9 +173,16 @@ def act(self, obs: Obs) -> Act:
 
         # convert gripper action into agents format
         action_chunk[:, -1] = 1 - action_chunk[:, -1]
+        self._cached_action_chunk = action_chunk
 
         return Act(action=action_chunk[0])
 
+    def reset(self, obs: Obs, instruction: Any):
+        super().reset(obs, instruction)
+        self.chunk_counter = self.execution_horizon
+        self._cached_action_chunk = None
+        return {}
+
 
 class OpenVLAModel(Agent):
     # === Utilities ===