diff --git a/examples/fr3/grasp_demo.py b/examples/fr3/grasp_demo.py index d54efc06..606c8d63 100644 --- a/examples/fr3/grasp_demo.py +++ b/examples/fr3/grasp_demo.py @@ -61,10 +61,11 @@ def approach(self, geom_name: str): def grasp(self, geom_name: str): - waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=0.01, num_waypoints=60) + waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=-0.005, num_waypoints=60) self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_OPEN) - self.step(self._action(Pose(), GripperWrapper.BINARY_GRIPPER_CLOSED)) + for _ in range(4): + self.step(self._action(self.unwrapped.robot.get_cartesian_position(), GripperWrapper.BINARY_GRIPPER_CLOSED)) waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=0.2, num_waypoints=60) self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_CLOSED) diff --git a/python/rcs/envs/sim.py b/python/rcs/envs/sim.py index 06817a1b..589b49fd 100644 --- a/python/rcs/envs/sim.py +++ b/python/rcs/envs/sim.py @@ -70,7 +70,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo return obs, 0, False, info["collision"] or not state.ik_success, info def reset( - self, seed: int | None = None, options: dict[str, Any] | None = None + self, *, seed: int | None = None, options: dict[str, Any] | None = None ) -> tuple[dict[str, Any], dict[str, Any]]: self.sim.reset() obs, info = super().reset(seed=seed, options=options) @@ -103,11 +103,11 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo truncated = np.all([info[key]["collision"] or info[key]["ik_success"] for key in info]) return obs, 0.0, False, bool(truncated), info - def reset( - self, seed: dict[str, int | None] | None = None, options: dict[str, Any] | None = None # type: ignore + def reset( # type: ignore + self, *, seed: dict[str, int | None] | None = None, options: dict[str, Any] | None = None ) -> tuple[dict[str, Any], dict[str, Any]]: if seed is None: - seed = {key: None for key in self.env.envs} + seed = dict.fromkeys(self.env.envs) if options is None: options = {key: {} for key in self.env.envs} obs = {} @@ -196,7 +196,6 @@ def __init__( self.sim.open_gui() def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], SupportsFloat, bool, bool, dict[str, Any]]: - self.collision_env.get_wrapper_attr("robot").set_joints_hard(self.unwrapped.robot.get_joint_position()) _, _, _, _, info = self.collision_env.step(action) @@ -219,7 +218,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], SupportsFloat, b return obs, reward, done, truncated, info def reset( - self, seed: int | None = None, options: dict[str, Any] | None = None + self, *, seed: int | None = None, options: dict[str, Any] | None = None ) -> tuple[dict[str, Any], dict[str, Any]]: # check if move to home is collision free if self.check_home_collision: @@ -333,7 +332,7 @@ def __init__( self.y_offset = y_offset def reset( - self, seed: int | None = None, options: dict[str, Any] | None = None + self, *, seed: int | None = None, options: dict[str, Any] | None = None ) -> tuple[dict[str, Any], dict[str, Any]]: if options is not None and "RandomObjectPos.init_object_pose" in options: assert isinstance( @@ -381,7 +380,7 @@ def __init__(self, env: gym.Env, simulation: sim.Sim, include_rotation: bool = F self.cube_joint_name = cube_joint_name def reset( - self, seed: int | None = None, options: dict[str, Any] | None = None + self, *, seed: int | None = None, options: dict[str, Any] | None = None ) -> tuple[dict[str, Any], dict[str, Any]]: obs, info = super().reset(seed=seed, options=options) self.sim.step(1) @@ -411,18 +410,17 @@ class PickCubeSuccessWrapper(gym.Wrapper): - whether the arm is standing still once the task is solved. """ - # In robot coordinates - EE_HOME = np.array([3.06890567e-01, 3.76703856e-23, 4.40282052e-01]) - def __init__(self, env, cube_joint_name="box_joint"): super().__init__(env) self.unwrapped: RobotEnv assert isinstance(self.unwrapped.robot, sim.SimRobot), "Robot must be a sim.SimRobot instance." self.sim = env.get_wrapper_attr("sim") self.cube_geom_name = "box_geom" + self.home_pose = self.unwrapped.robot.get_cartesian_position() self._gripper_closing = 0 + self._gripper = self.get_wrapper_attr("_gripper") - def step(self, action: dict[str, Any]): + def step(self, action: dict[str, Any]): # type: ignore obs, reward, _, truncated, info = super().step(action) if ( self._gripper.get_normalized_width() > 0.01 @@ -438,27 +436,32 @@ def step(self, action: dict[str, Any]): cube_pose.translation() - self.unwrapped.robot.get_cartesian_position().translation() ) obj_to_goal_dist = 0.10 - min(cube_pose.translation()[-1], 0.10) + obj_to_goal_dist = np.linalg.norm(cube_pose.translation() - self.home_pose.translation()) # NOTE: 4 depends on the time passing between each step. is_grasped = ( self._gripper_closing >= 4 # gripper is closing since more than 4 steps and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED # command is still close - and tcp_to_obj_dist <= 0.007 # tcp to cube center is max 7mm + and tcp_to_obj_dist <= 0.01 # tcp to cube center is max 1cm ) - success = obj_to_goal_dist == 0 and info["is_grasped"] + success = obj_to_goal_dist <= 0.022 and info["is_grasped"] movement = np.linalg.norm(self.sim.data.qvel) reaching_reward = 1 - np.tanh(5 * tcp_to_obj_dist) - place_reward = 1 - np.tanh(5 * obj_to_goal_dist) * is_grasped - static_reward = 1 - np.tanh(5 * movement) * success + place_reward = 1 - np.tanh(5 * obj_to_goal_dist) + static_reward = 1 - np.tanh(5 * movement) info["is_grasped"] = is_grasped info["success"] = success - reward = reaching_reward + place_reward + static_reward + reward = reaching_reward + place_reward * is_grasped + static_reward * success reward /= 3 # type: ignore return obs, reward, success, truncated, info + def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None): + obs, info = super().reset() + self.home_pose = self.unwrapped.robot.get_cartesian_position() + return obs, info -class DigitalTwin(gym.Wrapper): +class DigitalTwin(gym.Wrapper): def __init__(self, env, twin_env): super().__init__(env) self.twin_env = twin_env