RobotControlStack · pkrack · Jan 20, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/examples/fr3/grasp_demo.py b/examples/fr3/grasp_demo.py
@@ -61,10 +61,11 @@ def approach(self, geom_name: str):
 
     def grasp(self, geom_name: str):
 
-        waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=0.01, num_waypoints=60)
+        waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=-0.005, num_waypoints=60)
         self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_OPEN)
 
-        self.step(self._action(Pose(), GripperWrapper.BINARY_GRIPPER_CLOSED))
+        for _ in range(4):
+            self.step(self._action(self.unwrapped.robot.get_cartesian_position(), GripperWrapper.BINARY_GRIPPER_CLOSED))
 
         waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=0.2, num_waypoints=60)
         self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_CLOSED)

diff --git a/python/rcs/envs/sim.py b/python/rcs/envs/sim.py
@@ -70,7 +70,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
         return obs, 0, False, info["collision"] or not state.ik_success, info
 
     def reset(
-        self, seed: int | None = None, options: dict[str, Any] | None = None
+        self, *, seed: int | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         self.sim.reset()
         obs, info = super().reset(seed=seed, options=options)
@@ -103,11 +103,11 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
         truncated = np.all([info[key]["collision"] or info[key]["ik_success"] for key in info])
         return obs, 0.0, False, bool(truncated), info
 
-    def reset(
-        self, seed: dict[str, int | None] | None = None, options: dict[str, Any] | None = None  # type: ignore
+    def reset(  # type: ignore
+        self, *, seed: dict[str, int | None] | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         if seed is None:
-            seed = {key: None for key in self.env.envs}
+            seed = dict.fromkeys(self.env.envs)
         if options is None:
             options = {key: {} for key in self.env.envs}
         obs = {}
@@ -196,7 +196,6 @@ def __init__(
             self.sim.open_gui()
 
     def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], SupportsFloat, bool, bool, dict[str, Any]]:
-
         self.collision_env.get_wrapper_attr("robot").set_joints_hard(self.unwrapped.robot.get_joint_position())
         _, _, _, _, info = self.collision_env.step(action)
 
@@ -219,7 +218,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], SupportsFloat, b
         return obs, reward, done, truncated, info
 
     def reset(
-        self, seed: int | None = None, options: dict[str, Any] | None = None
+        self, *, seed: int | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         # check if move to home is collision free
         if self.check_home_collision:
@@ -333,7 +332,7 @@ def __init__(
         self.y_offset = y_offset
 
     def reset(
-        self, seed: int | None = None, options: dict[str, Any] | None = None
+        self, *, seed: int | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         if options is not None and "RandomObjectPos.init_object_pose" in options:
             assert isinstance(
@@ -381,7 +380,7 @@ def __init__(self, env: gym.Env, simulation: sim.Sim, include_rotation: bool = F
         self.cube_joint_name = cube_joint_name
 
     def reset(
-        self, seed: int | None = None, options: dict[str, Any] | None = None
+        self, *, seed: int | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         obs, info = super().reset(seed=seed, options=options)
         self.sim.step(1)
@@ -411,18 +410,17 @@ class PickCubeSuccessWrapper(gym.Wrapper):
     - whether the arm is standing still once the task is solved.
     """
 
-    # In robot coordinates
-    EE_HOME = np.array([3.06890567e-01, 3.76703856e-23, 4.40282052e-01])
-
     def __init__(self, env, cube_joint_name="box_joint"):
         super().__init__(env)
         self.unwrapped: RobotEnv
         assert isinstance(self.unwrapped.robot, sim.SimRobot), "Robot must be a sim.SimRobot instance."
         self.sim = env.get_wrapper_attr("sim")
         self.cube_geom_name = "box_geom"
+        self.home_pose = self.unwrapped.robot.get_cartesian_position()
         self._gripper_closing = 0
+        self._gripper = self.get_wrapper_attr("_gripper")
 
-    def step(self, action: dict[str, Any]):
+    def step(self, action: dict[str, Any]):  # type: ignore
         obs, reward, _, truncated, info = super().step(action)
         if (
             self._gripper.get_normalized_width() > 0.01
@@ -438,27 +436,32 @@ def step(self, action: dict[str, Any]):
             cube_pose.translation() - self.unwrapped.robot.get_cartesian_position().translation()
         )
         obj_to_goal_dist = 0.10 - min(cube_pose.translation()[-1], 0.10)
+        obj_to_goal_dist = np.linalg.norm(cube_pose.translation() - self.home_pose.translation())
         # NOTE: 4 depends on the time passing between each step.
         is_grasped = (
             self._gripper_closing >= 4  # gripper is closing since more than 4 steps
             and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED  # command is still close
-            and tcp_to_obj_dist <= 0.007  # tcp to cube center is max 7mm
+            and tcp_to_obj_dist <= 0.01  # tcp to cube center is max 1cm
         )
-        success = obj_to_goal_dist == 0 and info["is_grasped"]
+        success = obj_to_goal_dist <= 0.022 and info["is_grasped"]
         movement = np.linalg.norm(self.sim.data.qvel)
 
         reaching_reward = 1 - np.tanh(5 * tcp_to_obj_dist)
-        place_reward = 1 - np.tanh(5 * obj_to_goal_dist) * is_grasped
-        static_reward = 1 - np.tanh(5 * movement) * success
+        place_reward = 1 - np.tanh(5 * obj_to_goal_dist)
+        static_reward = 1 - np.tanh(5 * movement)
         info["is_grasped"] = is_grasped
         info["success"] = success
-        reward = reaching_reward + place_reward + static_reward
+        reward = reaching_reward + place_reward * is_grasped + static_reward * success
         reward /= 3  # type: ignore
         return obs, reward, success, truncated, info
 
+    def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None):
+        obs, info = super().reset()
+        self.home_pose = self.unwrapped.robot.get_cartesian_position()
+        return obs, info
 
-class DigitalTwin(gym.Wrapper):
 
+class DigitalTwin(gym.Wrapper):
     def __init__(self, env, twin_env):
         super().__init__(env)
         self.twin_env = twin_env