diff --git a/source/isaaclab/isaaclab/envs/mdp/events.py b/source/isaaclab/isaaclab/envs/mdp/events.py index 8b46b549d19b..9736f9b3976f 100644 --- a/source/isaaclab/isaaclab/envs/mdp/events.py +++ b/source/isaaclab/isaaclab/envs/mdp/events.py @@ -601,10 +601,10 @@ def randomize(data: torch.Tensor, params: tuple[float, float]) -> torch.Tensor: global_indices = actuator_joint_indices[actuator_indices] # Randomize stiffness if stiffness_distribution_params is not None: - stiffness = actuator.stiffness[env_ids].clone() + stiffness = wp.to_torch(actuator.data.actuator_stiffness)[env_ids].clone() stiffness[:, actuator_indices] = self.default_joint_stiffness[env_ids][:, global_indices].clone() randomize(stiffness, stiffness_distribution_params) - actuator.stiffness[env_ids] = stiffness + actuator.data._actuator_stiffness = wp.from_torch(stiffness) if isinstance(actuator, ImplicitActuator): self.asset.write_joint_stiffness_to_sim( stiffness, joint_ids=actuator.joint_indices, env_ids=env_ids @@ -614,7 +614,7 @@ def randomize(data: torch.Tensor, params: tuple[float, float]) -> torch.Tensor: damping = wp.to_torch(self.asset.data.actuator_damping)[env_ids].clone() damping[:, actuator_indices] = self.default_joint_damping[env_ids][:, global_indices].clone() randomize(damping, damping_distribution_params) - actuator.damping[env_ids] = damping + actuator.data._actuator_damping = wp.from_torch(damping) if isinstance(actuator, ImplicitActuator): self.asset.write_joint_damping_to_sim(damping, joint_ids=actuator.joint_indices, env_ids=env_ids) @@ -803,7 +803,7 @@ def __call__( ) # set the position limits into the physics simulation self.asset.write_joint_position_limit_to_sim( - joint_pos_limits, joint_ids=joint_ids, env_ids=env_ids, warn_limit_violation=False + joint_pos_limits, joint_ids=joint_ids, env_ids=env_ids ) diff --git a/source/isaaclab/isaaclab/managers/event_manager.py b/source/isaaclab/isaaclab/managers/event_manager.py index ddebb154f133..cfc33e829728 100644 --- a/source/isaaclab/isaaclab/managers/event_manager.py +++ b/source/isaaclab/isaaclab/managers/event_manager.py @@ -204,6 +204,8 @@ def apply( # iterate over all the event terms for index, term_cfg in enumerate(self._mode_term_cfgs[mode]): + # initialize term_cfg func before call + term_cfg_func_obj = term_cfg.func(term_cfg, self._env) if mode == "interval": # extract time left for this term time_left = self._interval_term_time_left[index] @@ -219,7 +221,7 @@ def apply( self._interval_term_time_left[index][:] = sampled_interval # call the event term (with None for env_ids) - term_cfg.func(self._env, None, **term_cfg.params) + term_cfg_func_obj(self._env, None, **term_cfg.params) else: valid_env_ids = (time_left < 1e-6).nonzero().flatten() if len(valid_env_ids) > 0: @@ -228,7 +230,7 @@ def apply( self._interval_term_time_left[index][valid_env_ids] = sampled_time # call the event term - term_cfg.func(self._env, valid_env_ids, **term_cfg.params) + term_cfg_func_obj(self._env, valid_env_ids, **term_cfg.params) elif mode == "reset": # obtain the minimum step count between resets min_step_count = term_cfg.min_step_count_between_reset @@ -243,7 +245,7 @@ def apply( self._reset_term_last_triggered_once[index][env_ids] = True # call the event term with the environment indices - term_cfg.func(self._env, env_ids, **term_cfg.params) + term_cfg_func_obj(self._env, env_ids, **term_cfg.params) else: # extract last reset step for this term last_triggered_step = self._reset_term_last_triggered_step_id[index][env_ids] @@ -269,10 +271,10 @@ def apply( self._reset_term_last_triggered_step_id[index][valid_env_ids] = global_env_step_count # call the event term - term_cfg.func(self._env, valid_env_ids, **term_cfg.params) + term_cfg_func_obj(self._env, valid_env_ids, **term_cfg.params) else: # call the event term - term_cfg.func(self._env, env_ids, **term_cfg.params) + term_cfg_func_obj(self._env, env_ids, **term_cfg.params) """ Operations - Term settings. diff --git a/source/isaaclab_assets/isaaclab_assets/robots/shadow_hand.py b/source/isaaclab_assets/isaaclab_assets/robots/shadow_hand.py new file mode 100644 index 000000000000..0cbf214a8145 --- /dev/null +++ b/source/isaaclab_assets/isaaclab_assets/robots/shadow_hand.py @@ -0,0 +1,84 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configuration for the dexterous hand from Shadow Robot. + +The following configurations are available: + +* :obj:`SHADOW_HAND_CFG`: Shadow Hand with implicit actuator model. + +Reference: + +* https://www.shadowrobot.com/dexterous-hand-series/ + +""" + +import isaaclab.sim as sim_utils +from isaaclab.actuators import ImplicitActuatorCfg +from isaaclab.assets.articulation import ArticulationCfg +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +## +# Configuration +## + +SHADOW_HAND_CFG = ArticulationCfg( + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/ShadowRobot/ShadowHand/shadow_hand_instanceable.usd", + activate_contact_sensors=False, + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=True, + retain_accelerations=True, + max_depenetration_velocity=1000.0, + ), + articulation_props=sim_utils.ArticulationRootPropertiesCfg( + enabled_self_collisions=True, + solver_position_iteration_count=8, + solver_velocity_iteration_count=0, + sleep_threshold=0.005, + stabilization_threshold=0.0005, + ), + # collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0), + joint_drive_props=sim_utils.JointDrivePropertiesCfg(drive_type="force"), + # fixed_tendons_props=sim_utils.FixedTendonPropertiesCfg(limit_stiffness=30.0, damping=0.1), + ), + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 0.5), + rot=(1.0, 0.0, 0.0, 0.0), + joint_pos={".*": 0.0}, + ), + actuators={ + "fingers": ImplicitActuatorCfg( + joint_names_expr=["robot0_WR.*", "robot0_(FF|MF|RF|LF|TH)J(3|2|1)", "robot0_(LF|TH)J4", "robot0_THJ0"], + effort_limit_sim={ + "robot0_WRJ1": 4.785, + "robot0_WRJ0": 2.175, + "robot0_(FF|MF|RF|LF)J1": 0.7245, + "robot0_FFJ(3|2)": 0.9, + "robot0_MFJ(3|2)": 0.9, + "robot0_RFJ(3|2)": 0.9, + "robot0_LFJ(4|3|2)": 0.9, + "robot0_THJ4": 2.3722, + "robot0_THJ3": 1.45, + "robot0_THJ(2|1)": 0.99, + "robot0_THJ0": 0.81, + }, + stiffness={ + "robot0_WRJ.*": 5.0, + "robot0_(FF|MF|RF|LF|TH)J(3|2|1)": 1.0, + "robot0_(LF|TH)J4": 1.0, + "robot0_THJ0": 1.0, + }, + damping={ + "robot0_WRJ.*": 0.5, + "robot0_(FF|MF|RF|LF|TH)J(3|2|1)": 0.1, + "robot0_(LF|TH)J4": 0.1, + "robot0_THJ0": 0.1, + }, + ), + }, + soft_joint_pos_limit_factor=1.0, +) +"""Configuration of Shadow Hand robot.""" diff --git a/source/isaaclab_newton/isaaclab_newton/actuators/actuator_base.py b/source/isaaclab_newton/isaaclab_newton/actuators/actuator_base.py index 24a28ae5f2c1..048823c4a586 100644 --- a/source/isaaclab_newton/isaaclab_newton/actuators/actuator_base.py +++ b/source/isaaclab_newton/isaaclab_newton/actuators/actuator_base.py @@ -155,6 +155,16 @@ def joint_mask(self) -> wp.array: """Articulation's masked indices that denote which joints are part of the group.""" return self._joint_mask + @property + def joint_indices(self) -> slice | torch.Tensor: + """Articulation's joint indices that are part of the group. + + Note: + If :obj:`slice(None)` is returned, then the group contains all the joints in the articulation. + We do this to avoid unnecessary indexing of the joints for performance reasons. + """ + return self._joint_indices + """ Operations. """ @@ -230,7 +240,7 @@ def _parse_joint_parameter(self, cfg_value: float | dict[str, float] | None, ori elif isinstance(cfg_value, dict): # if dict, then parse the regular expression indices, _, values = string_utils.resolve_matching_names_values(cfg_value, self.joint_names) - indices_global = [self._joint_indices[i] for i in indices] + indices_global = [self._joint_indices[i].cpu() for i in indices] wp.launch( update_array2D_with_array1D_indexed, dim=(self._num_envs, len(indices_global)), diff --git a/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation.py b/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation.py index 289416be7fde..d9007b23c79a 100644 --- a/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation.py +++ b/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation.py @@ -1041,8 +1041,7 @@ def write_joint_damping_to_sim( def write_joint_position_limit_to_sim( self, - lower_limits: wp.array | float, - upper_limits: wp.array | float, + limits: wp.array | float, joint_ids: Sequence[int] | None = None, env_ids: Sequence[int] | None = None, joint_mask: wp.array | None = None, @@ -1059,13 +1058,17 @@ def write_joint_position_limit_to_sim( env_mask: The environment mask. Shape is (num_instances,). """ # Resolve indices into mask, convert from partial data to complete data, handles the conversion to warp. +<<<<<<< Updated upstream if isinstance(upper_limits, torch.Tensor): if self._temp_joint_pos is None: self._temp_joint_pos = wp.zeros( (self.num_instances, self.num_joints), dtype=wp.float32, device=self.device ) +======= + if isinstance(limits, torch.Tensor): +>>>>>>> Stashed changes upper_limits = make_complete_data_from_torch_dual_index( - upper_limits, + limits[:,0,:,0], self.num_instances, self.num_joints, env_ids, @@ -1074,13 +1077,17 @@ def write_joint_position_limit_to_sim( device=self.device, out=self._temp_joint_pos, ) +<<<<<<< Updated upstream if isinstance(lower_limits, torch.Tensor): if self._temp_joint_vel is None: self._temp_joint_vel = wp.zeros( (self.num_instances, self.num_joints), dtype=wp.float32, device=self.device ) +======= + if isinstance(limits, torch.Tensor): +>>>>>>> Stashed changes lower_limits = make_complete_data_from_torch_dual_index( - lower_limits, + limits[:,0,:,1], self.num_instances, self.num_joints, env_ids, @@ -2290,6 +2297,11 @@ def _process_actuators_cfg(self): f"No joints found for actuator group: {actuator_name} with joint name expression:" f" {actuator_cfg.joint_names_expr}." ) + + if len(joint_names) == self.num_joints: + joint_indices = slice(None) + else: + joint_indices = torch.tensor(joint_indices, device=self.device) # create actuator collection # note: for efficiency avoid indexing when over all indices actuator: ActuatorBase = actuator_cfg.class_type( diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/inhand_manipulation/inhand_manipulation_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/inhand_manipulation/inhand_manipulation_env.py index 0417d46ce2a7..df1c1b7e336a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/inhand_manipulation/inhand_manipulation_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/inhand_manipulation/inhand_manipulation_env.py @@ -335,33 +335,33 @@ def compute_full_observations(self): obs[inds] = 0.0 return obs - # def compute_full_state(self): - # states = torch.cat( - # ( - # # hand - # unscale(self.hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits), - # self.cfg.vel_obs_scale * self.hand_dof_vel, - # # object - # self.object_pos, - # self.object_rot, - # self.object_linvel, - # self.cfg.vel_obs_scale * self.object_angvel, - # # goal - # self.in_hand_pos, - # self.goal_rot, - # quat_mul(self.object_rot, quat_conjugate(self.goal_rot)), - # # fingertips - # self.fingertip_pos.view(self.num_envs, self.num_fingertips * 3), - # self.fingertip_rot.view(self.num_envs, self.num_fingertips * 4), - # self.fingertip_velocities.view(self.num_envs, self.num_fingertips * 6), - # self.cfg.force_torque_obs_scale - # * self.fingertip_force_sensors.view(self.num_envs, self.num_fingertips * 6), - # # actions - # self.actions, - # ), - # dim=-1, - # ) - # return states + def compute_full_state(self): + states = torch.cat( + ( + # hand + unscale(self.hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits), + self.cfg.vel_obs_scale * self.hand_dof_vel, + # object + self.object_pos, + self.object_rot, + self.object_linvel, + self.cfg.vel_obs_scale * self.object_angvel, + # goal + self.in_hand_pos, + self.goal_rot, + quat_mul(self.object_rot, quat_conjugate(self.goal_rot)), + # fingertips + self.fingertip_pos.view(self.num_envs, self.num_fingertips * 3), + self.fingertip_rot.view(self.num_envs, self.num_fingertips * 4), + self.fingertip_velocities.view(self.num_envs, self.num_fingertips * 6), + # self.cfg.force_torque_obs_scale * + # self.fingertip_force_sensors.view(self.num_envs, self.num_fingertips * 6), + # actions + self.actions, + ), + dim=-1, + ) + return states @torch.jit.script diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/__init__.py new file mode 100644 index 000000000000..4d2f0f3eee50 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/__init__.py @@ -0,0 +1,78 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Shadow Hand environment. +""" + +import gymnasium as gym + +from . import agents + +## +# Register Gym environments. +## + +inhand_task_entry = "isaaclab_tasks.direct.inhand_manipulation" + +gym.register( + id="Isaac-Repose-Cube-Shadow-Direct-v0", + entry_point=f"{inhand_task_entry}.inhand_manipulation_env:InHandManipulationEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.shadow_hand_env_cfg:ShadowHandEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Repose-Cube-Shadow-OpenAI-FF-Direct-v0", + entry_point=f"{inhand_task_entry}.inhand_manipulation_env:InHandManipulationEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.shadow_hand_env_cfg:ShadowHandOpenAIEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_ff_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandAsymFFPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ff_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Repose-Cube-Shadow-OpenAI-LSTM-Direct-v0", + entry_point=f"{inhand_task_entry}.inhand_manipulation_env:InHandManipulationEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.shadow_hand_env_cfg:ShadowHandOpenAIEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_lstm_cfg.yaml", + }, +) + +# ------- +# Vision +# ------- + +gym.register( + id="Isaac-Repose-Cube-Shadow-Vision-Direct-v0", + entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandVisionFFPPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_vision_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Repose-Cube-Shadow-Vision-Direct-Play-v0", + entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnvPlayCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandVisionFFPPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_vision_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/ skrl_ff_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/ skrl_ff_ppo_cfg.yaml new file mode 100644 index 000000000000..0831dd7b4125 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/ skrl_ff_ppo_cfg.yaml @@ -0,0 +1,85 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [400, 400, 200, 100] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [512, 512, 256, 128] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 16 + learning_epochs: 4 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.01 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.005 + value_loss_scale: 1.0 + kl_threshold: 0.0 + rewards_shaper_scale: 1.0 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "shadow_hand_openai_ff" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 160000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/__init__.py new file mode 100644 index 000000000000..460a30569089 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_cfg.yaml new file mode 100644 index 000000000000..30b3b0b012f4 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_cfg.yaml @@ -0,0 +1,91 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + # doesn't have this fine grained control but made it close + network: + name: actor_critic + separate: False + space: + continuous: + mu_activation: None + sigma_activation: None + + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [512, 512, 256, 128] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: shadow_hand + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + value_bootstrap: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.01 + normalize_advantage: True + gamma: 0.99 + tau : 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.016 + score_to_win: 100000 + max_epochs: 5000 + save_best_after: 100 + save_frequency: 200 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 16 + minibatch_size: 32768 + mini_epochs: 5 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 + + player: + deterministic: True + games_num: 100000 + print_stats: True diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_ff_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_ff_cfg.yaml new file mode 100644 index 000000000000..6724046a9a59 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_ff_cfg.yaml @@ -0,0 +1,112 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [400, 400, 200, 100] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: shadow_hand_openai_ff + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 + reward_shaper: + scale_value: 0.01 + normalize_advantage: True + gamma: 0.998 + tau: 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.016 + score_to_win: 100000 + max_epochs: 10000 + save_best_after: 100 + save_frequency: 200 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 16 + minibatch_size: 16384 + mini_epochs: 4 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 + + central_value_config: + minibatch_size: 32768 + mini_epochs: 4 + learning_rate: 5e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.016 + clip_value: True + normalize_input: True + truncate_grads: True + + network: + name: actor_critic + central_value: True + mlp: + units: [512, 512, 256, 128] + activation: elu + d2rl: False + initializer: + name: default + regularizer: + name: None + + player: + deterministic: True + games_num: 100000 + print_stats: True diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_lstm_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_lstm_cfg.yaml new file mode 100644 index 000000000000..0aea26e9cde6 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_lstm_cfg.yaml @@ -0,0 +1,123 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [512] + activation: relu + d2rl: False + + initializer: + name: default + regularizer: + name: None + rnn: + name: lstm + units: 1024 + layers: 1 + before_mlp: True + layer_norm: True + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: shadow_hand_openai_lstm + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 # configured from the script (based on num_envs) + reward_shaper: + scale_value: 0.01 + normalize_advantage: True + gamma: 0.998 + tau: 0.95 + learning_rate: 1e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.016 + score_to_win: 100000 + max_epochs: 10000 + save_best_after: 100 + save_frequency: 200 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 16 + minibatch_size: 16384 + mini_epochs: 4 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 + + central_value_config: + minibatch_size: 32768 + mini_epochs: 4 + learning_rate: 1e-4 + kl_threshold: 0.016 + clip_value: True + normalize_input: True + truncate_grads: True + + network: + name: actor_critic + central_value: True + mlp: + units: [512] + activation: relu + d2rl: False + initializer: + name: default + regularizer: + name: None + rnn: + name: lstm + units: 1024 + layers: 1 + before_mlp: True + layer_norm: True + zero_rnn_on_done: False + + player: + deterministic: True + games_num: 100000 + print_stats: True diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_vision_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_vision_cfg.yaml new file mode 100644 index 000000000000..43fa1d20fb88 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rl_games_ppo_vision_cfg.yaml @@ -0,0 +1,112 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +params: + seed: 42 + + # environment wrapper clipping + env: + # added to the wrapper + clip_observations: 5.0 + # can make custom wrapper? + clip_actions: 1.0 + + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [1024, 512, 512, 256, 128] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: False # flag which sets whether to load the checkpoint + load_path: '' # path to the checkpoint to load + + config: + name: shadow_hand_vision + env_name: rlgpu + device: 'cuda:0' + device_name: 'cuda:0' + multi_gpu: False + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: -1 + reward_shaper: + scale_value: 0.01 + normalize_advantage: True + gamma: 0.99 + tau: 0.95 + learning_rate: 5e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.01 + score_to_win: 100000 + max_epochs: 50000 + save_best_after: 100 + save_frequency: 200 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 64 + minibatch_size: 19600 + mini_epochs: 5 + critic_coef: 4 + clip_value: True + seq_length: 4 + bounds_loss_coef: 0.0001 + + central_value_config: + minibatch_size: 19600 + mini_epochs: 5 + learning_rate: 5e-4 + lr_schedule: adaptive + schedule_type: standard + kl_threshold: 0.01 + clip_value: True + normalize_input: True + truncate_grads: True + + network: + name: actor_critic + central_value: True + mlp: + units: [1024, 512, 512, 256, 128] + activation: elu + d2rl: False + initializer: + name: default + regularizer: + name: None + + player: + deterministic: True + games_num: 100000 + print_stats: True diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py new file mode 100644 index 000000000000..6ab4c9e56f5a --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py @@ -0,0 +1,98 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg + + +@configclass +class ShadowHandPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 16 + max_iterations = 10000 + save_interval = 250 + experiment_name = "shadow_hand" + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, + actor_hidden_dims=[512, 512, 256, 128], + critic_hidden_dims=[512, 512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.016, + max_grad_norm=1.0, + ) + + +@configclass +class ShadowHandAsymFFPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 16 + max_iterations = 10000 + save_interval = 250 + experiment_name = "shadow_hand_openai_ff" + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, + actor_hidden_dims=[400, 400, 200, 100], + critic_hidden_dims=[512, 512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=4, + num_mini_batches=4, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) + + +@configclass +class ShadowHandVisionFFPPORunnerCfg(RslRlOnPolicyRunnerCfg): + num_steps_per_env = 64 + max_iterations = 50000 + save_interval = 250 + experiment_name = "shadow_hand_vision" + policy = RslRlPpoActorCriticCfg( + init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, + actor_hidden_dims=[1024, 512, 512, 256, 128], + critic_hidden_dims=[1024, 512, 512, 256, 128], + activation="elu", + ) + algorithm = RslRlPpoAlgorithmCfg( + value_loss_coef=1.0, + use_clipped_value_loss=True, + clip_param=0.2, + entropy_coef=0.005, + num_learning_epochs=5, + num_mini_batches=4, + learning_rate=5.0e-4, + schedule="adaptive", + gamma=0.99, + lam=0.95, + desired_kl=0.01, + max_grad_norm=1.0, + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ppo_cfg.yaml new file mode 100644 index 000000000000..9bba87a54552 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ppo_cfg.yaml @@ -0,0 +1,85 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: False + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [512, 512, 256, 128] + activations: elu + output: ACTIONS + value: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [512, 512, 256, 128] + activations: elu + output: ONE + + +# Rollout memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: -1 # automatically determined (same as agent:rollouts) + + +# PPO agent configuration (field names are from PPO_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ppo.html +agent: + class: PPO + rollouts: 16 + learning_epochs: 5 + mini_batches: 4 + discount_factor: 0.99 + lambda: 0.95 + learning_rate: 5.0e-04 + learning_rate_scheduler: KLAdaptiveLR + learning_rate_scheduler_kwargs: + kl_threshold: 0.016 + state_preprocessor: RunningStandardScaler + state_preprocessor_kwargs: null + value_preprocessor: RunningStandardScaler + value_preprocessor_kwargs: null + random_timesteps: 0 + learning_starts: 0 + grad_norm_clip: 1.0 + ratio_clip: 0.2 + value_clip: 0.2 + clip_predicted_values: True + entropy_loss_scale: 0.0 + value_loss_scale: 2.0 + kl_threshold: 0.0 + rewards_shaper_scale: 0.01 + time_limit_bootstrap: False + # logging and checkpoint + experiment: + directory: "shadow_hand" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 80000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/feature_extractor.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/feature_extractor.py new file mode 100644 index 000000000000..5154ff2ff307 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/feature_extractor.py @@ -0,0 +1,195 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import glob +import os +import torch +import torch.nn as nn +import torchvision + +from isaaclab.sensors import save_images_to_file +from isaaclab.utils import configclass + + +class FeatureExtractorNetwork(nn.Module): + """CNN architecture used to regress keypoint positions of the in-hand cube from image data.""" + + def __init__(self): + super().__init__() + num_channel = 7 + self.cnn = nn.Sequential( + nn.Conv2d(num_channel, 16, kernel_size=6, stride=2, padding=0), + nn.ReLU(), + nn.LayerNorm([16, 58, 58]), + nn.Conv2d(16, 32, kernel_size=4, stride=2, padding=0), + nn.ReLU(), + nn.LayerNorm([32, 28, 28]), + nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), + nn.ReLU(), + nn.LayerNorm([64, 13, 13]), + nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=0), + nn.ReLU(), + nn.LayerNorm([128, 6, 6]), + nn.AvgPool2d(6), + ) + + self.linear = nn.Sequential( + nn.Linear(128, 27), + ) + + self.data_transforms = torchvision.transforms.Compose([ + torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + def forward(self, x): + x = x.permute(0, 3, 1, 2) + x[:, 0:3, :, :] = self.data_transforms(x[:, 0:3, :, :]) + x[:, 4:7, :, :] = self.data_transforms(x[:, 4:7, :, :]) + cnn_x = self.cnn(x) + out = self.linear(cnn_x.view(-1, 128)) + return out + + +@configclass +class FeatureExtractorCfg: + """Configuration for the feature extractor model.""" + + train: bool = True + """If True, the feature extractor model is trained during the rollout process. Default is False.""" + + load_checkpoint: bool = False + """If True, the feature extractor model is loaded from a checkpoint. Default is False.""" + + write_image_to_file: bool = False + """If True, the images from the camera sensor are written to file. Default is False.""" + + +class FeatureExtractor: + """Class for extracting features from image data. + + It uses a CNN to regress keypoint positions from normalized RGB, depth, and segmentation images. + If the train flag is set to True, the CNN is trained during the rollout process. + """ + + def __init__(self, cfg: FeatureExtractorCfg, device: str, log_dir: str | None = None): + """Initialize the feature extractor model. + + Args: + cfg: Configuration for the feature extractor model. + device: Device to run the model on. + log_dir: Directory to save checkpoints. Default is None, which uses the local + "logs" folder resolved relative to this file. + """ + + self.cfg = cfg + self.device = device + + # Feature extractor model + self.feature_extractor = FeatureExtractorNetwork() + self.feature_extractor.to(self.device) + + self.step_count = 0 + if log_dir is not None: + self.log_dir = log_dir + else: + self.log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "logs") + if not os.path.exists(self.log_dir): + os.makedirs(self.log_dir) + + if self.cfg.load_checkpoint: + list_of_files = glob.glob(self.log_dir + "/*.pth") + latest_file = max(list_of_files, key=os.path.getctime) + checkpoint = os.path.join(self.log_dir, latest_file) + print(f"[INFO]: Loading feature extractor checkpoint from {checkpoint}") + self.feature_extractor.load_state_dict(torch.load(checkpoint, weights_only=True)) + + if self.cfg.train: + self.optimizer = torch.optim.Adam(self.feature_extractor.parameters(), lr=1e-4) + self.l2_loss = nn.MSELoss() + self.feature_extractor.train() + else: + self.feature_extractor.eval() + + def _preprocess_images( + self, rgb_img: torch.Tensor, depth_img: torch.Tensor, segmentation_img: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Preprocesses the input images. + + Args: + rgb_img (torch.Tensor): RGB image tensor. Shape: (N, H, W, 3). + depth_img (torch.Tensor): Depth image tensor. Shape: (N, H, W, 1). + segmentation_img (torch.Tensor): Segmentation image tensor. Shape: (N, H, W, 3) + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Preprocessed RGB, depth, and segmentation + """ + rgb_img = rgb_img / 255.0 + # process depth image + depth_img[depth_img == float("inf")] = 0 + depth_img /= 5.0 + depth_img /= torch.max(depth_img) + # process segmentation image + segmentation_img = segmentation_img / 255.0 + mean_tensor = torch.mean(segmentation_img, dim=(1, 2), keepdim=True) + segmentation_img -= mean_tensor + return rgb_img, depth_img, segmentation_img + + def _save_images(self, rgb_img: torch.Tensor, depth_img: torch.Tensor, segmentation_img: torch.Tensor): + """Writes image buffers to file. + + Args: + rgb_img (torch.Tensor): RGB image tensor. Shape: (N, H, W, 3). + depth_img (torch.Tensor): Depth image tensor. Shape: (N, H, W, 1). + segmentation_img (torch.Tensor): Segmentation image tensor. Shape: (N, H, W, 3). + """ + save_images_to_file(rgb_img, "shadow_hand_rgb.png") + save_images_to_file(depth_img, "shadow_hand_depth.png") + save_images_to_file(segmentation_img, "shadow_hand_segmentation.png") + + def step( + self, rgb_img: torch.Tensor, depth_img: torch.Tensor, segmentation_img: torch.Tensor, gt_pose: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor]: + """Extracts the features using the images and trains the model if the train flag is set to True. + + Args: + rgb_img (torch.Tensor): RGB image tensor. Shape: (N, H, W, 3). + depth_img (torch.Tensor): Depth image tensor. Shape: (N, H, W, 1). + segmentation_img (torch.Tensor): Segmentation image tensor. Shape: (N, H, W, 3). + gt_pose (torch.Tensor): Ground truth pose tensor (position and corners). Shape: (N, 27). + + Returns: + tuple[torch.Tensor, torch.Tensor]: Pose loss and predicted pose. + """ + + rgb_img, depth_img, segmentation_img = self._preprocess_images(rgb_img, depth_img, segmentation_img) + + if self.cfg.write_image_to_file: + self._save_images(rgb_img, depth_img, segmentation_img) + + if self.cfg.train: + with torch.enable_grad(): + with torch.inference_mode(False): + img_input = torch.cat((rgb_img, depth_img, segmentation_img), dim=-1) + self.optimizer.zero_grad() + + predicted_pose = self.feature_extractor(img_input) + pose_loss = self.l2_loss(predicted_pose, gt_pose.clone()) * 100 + + pose_loss.backward() + self.optimizer.step() + + if self.step_count % 50000 == 0: + torch.save( + self.feature_extractor.state_dict(), + os.path.join(self.log_dir, f"cnn_{self.step_count}_{pose_loss.detach().cpu().numpy()}.pth"), + ) + + self.step_count += 1 + + return pose_loss, predicted_pose + else: + img_input = torch.cat((rgb_img, depth_img, segmentation_img), dim=-1) + predicted_pose = self.feature_extractor(img_input) + return None, predicted_pose diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py new file mode 100644 index 000000000000..178ed17a95ab --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_env_cfg.py @@ -0,0 +1,315 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from isaaclab_assets.robots.shadow_hand import SHADOW_HAND_CFG + +import isaaclab.envs.mdp as mdp +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, RigidObjectCfg +from isaaclab.envs import DirectRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import SceneEntityCfg +from isaaclab.markers import VisualizationMarkersCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.sim._impl.newton_manager_cfg import NewtonCfg +from isaaclab.sim._impl.solvers_cfg import MJWarpSolverCfg +from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR +from isaaclab.utils.noise import GaussianNoiseCfg, NoiseModelWithAdditiveBiasCfg + + +@configclass +class EventCfg: + """Configuration for randomization.""" + + # -- robot + # robot_physics_material = EventTerm( + # func=mdp.randomize_rigid_body_material, + # mode="reset", + # min_step_count_between_reset=720, + # params={ + # "asset_cfg": SceneEntityCfg("robot"), + # "static_friction_range": (0.7, 1.3), + # "dynamic_friction_range": (1.0, 1.0), + # "restitution_range": (1.0, 1.0), + # "num_buckets": 250, + # }, + # ) + robot_joint_stiffness_and_damping = EventTerm( + func=mdp.randomize_actuator_gains, + min_step_count_between_reset=720, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*", joint_ids=list(range(20))), + "stiffness_distribution_params": (0.75, 1.5), + "damping_distribution_params": (0.3, 3.0), + "operation": "scale", + "distribution": "log_uniform", + }, + ) + robot_joint_pos_limits = EventTerm( + func=mdp.randomize_joint_parameters, + min_step_count_between_reset=720, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", joint_names=".*"), + "lower_limit_distribution_params": (0.00, 0.01), + "upper_limit_distribution_params": (0.00, 0.01), + "operation": "add", + "distribution": "gaussian", + }, + ) + # robot_tendon_properties = EventTerm( + # func=mdp.randomize_fixed_tendon_parameters, + # min_step_count_between_reset=720, + # mode="reset", + # params={ + # "asset_cfg": SceneEntityCfg("robot", fixed_tendon_names=".*"), + # "stiffness_distribution_params": (0.75, 1.5), + # "damping_distribution_params": (0.3, 3.0), + # "operation": "scale", + # "distribution": "log_uniform", + # }, + # ) + + # -- object + # object_physics_material = EventTerm( + # func=mdp.randomize_rigid_body_material, + # min_step_count_between_reset=720, + # mode="reset", + # params={ + # "asset_cfg": SceneEntityCfg("object"), + # "static_friction_range": (0.7, 1.3), + # "dynamic_friction_range": (1.0, 1.0), + # "restitution_range": (1.0, 1.0), + # "num_buckets": 250, + # }, + # ) + object_scale_mass = EventTerm( + func=mdp.randomize_rigid_body_mass, + min_step_count_between_reset=720, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("object"), + "mass_distribution_params": (0.5, 1.5), + "operation": "scale", + "distribution": "uniform", + }, + ) + + # -- scene + # reset_gravity = EventTerm( + # func=mdp.randomize_physics_scene_gravity, + # mode="interval", + # is_global_time=True, + # interval_range_s=(36.0, 36.0), # time_s = num_steps * (decimation * dt) + # params={ + # "gravity_distribution_params": ([0.0, 0.0, 0.0], [0.0, 0.0, 0.4]), + # "operation": "add", + # "distribution": "gaussian", + # }, + # ) + + +@configclass +class ShadowHandEnvCfg(DirectRLEnvCfg): + # env + decimation = 2 + episode_length_s = 10.0 + action_space = 20 + observation_space = 157 # (full) + state_space = 0 + asymmetric_obs = False + obs_type = "full" + + solver_cfg = MJWarpSolverCfg( + solver="newton", + integrator="implicit", + njmax=80, + nconmax=70, + impratio=10.0, + cone="elliptic", + update_data_interval=2, + iterations=100, + ls_iterations=15, + ls_parallel=True, + # save_to_mjcf="AllegroHand.xml", + ) + + # simulation + newton_cfg = NewtonCfg( + solver_cfg=solver_cfg, + num_substeps=2, + debug_mode=False, + ) + + sim: SimulationCfg = SimulationCfg( + dt=1 / 120, + render_interval=decimation, + physics_material=RigidBodyMaterialCfg( + static_friction=1.0, + dynamic_friction=1.0, + ), + newton_cfg=newton_cfg, + ) + # robot + robot_cfg: ArticulationCfg = SHADOW_HAND_CFG.replace(prim_path="/World/envs/env_.*/Robot").replace( + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, 0.0, 1.0), + rot=(0.0, 0.0, 1.0, 0.0), + joint_pos={".*": 0.0}, + ) + ) + actuated_joint_names = [ + "robot0_WRJ1", + "robot0_WRJ0", + "robot0_FFJ3", + "robot0_FFJ2", + "robot0_FFJ1", + "robot0_MFJ3", + "robot0_MFJ2", + "robot0_MFJ1", + "robot0_RFJ3", + "robot0_RFJ2", + "robot0_RFJ1", + "robot0_LFJ4", + "robot0_LFJ3", + "robot0_LFJ2", + "robot0_LFJ1", + "robot0_THJ4", + "robot0_THJ3", + "robot0_THJ2", + "robot0_THJ1", + "robot0_THJ0", + ] + fingertip_body_names = [ + "robot0_ffdistal", + "robot0_mfdistal", + "robot0_rfdistal", + "robot0_lfdistal", + "robot0_thdistal", + ] + + # in-hand object + object_cfg: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/object", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + mass_props=sim_utils.MassPropertiesCfg(density=200.0), + scale=(0.9, 0.9, 0.9), + ), + init_state=ArticulationCfg.InitialStateCfg(pos=(0.0, 0.4, 1.05), rot=(1.0, 0.0, 0.0, 0.0)), + actuators={}, + articulation_root_prim_path="", + ) + # goal object + goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg( + prim_path="/Visuals/goal_marker", + markers={ + "goal": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + scale=(1.0, 1.0, 1.0), + ) + }, + ) + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg( + num_envs=8192, env_spacing=0.75, replicate_physics=True, clone_in_fabric=True + ) + + # reset + reset_position_noise = 0.01 # range of position at reset + reset_dof_pos_noise = 0.2 # range of dof pos at reset + reset_dof_vel_noise = 0.0 # range of dof vel at reset + # reward scales + dist_reward_scale = -10.0 + rot_reward_scale = 1.0 + rot_eps = 0.1 + action_penalty_scale = -0.0002 + reach_goal_bonus = 250 + fall_penalty = 0 + fall_dist = 0.24 + vel_obs_scale = 0.2 + success_tolerance = 0.1 + max_consecutive_success = 0 + av_factor = 0.1 + act_moving_average = 1.0 + force_torque_obs_scale = 10.0 + + +@configclass +class ShadowHandOpenAIEnvCfg(ShadowHandEnvCfg): + # env + decimation = 3 + episode_length_s = 8.0 + action_space = 20 + observation_space = 42 + state_space = 187 + asymmetric_obs = True + obs_type = "openai" + # simulation + solver_cfg = MJWarpSolverCfg( + solver="newton", + integrator="implicit", + njmax=150, + nconmax=70, + impratio=10.0, + cone="elliptic", + update_data_interval=2, + iterations=100, + ls_iterations=15, + ls_parallel=True, + # save_to_mjcf="AllegroHand.xml", + ) + + # simulation + newton_cfg = NewtonCfg( + solver_cfg=solver_cfg, + num_substeps=2, + debug_mode=False, + ) + + sim: SimulationCfg = SimulationCfg( + dt=1 / 120, + render_interval=decimation, + physics_material=RigidBodyMaterialCfg( + static_friction=1.0, + dynamic_friction=1.0, + ), + newton_cfg=newton_cfg, + ) + # reset + reset_position_noise = 0.01 # range of position at reset + reset_dof_pos_noise = 0.2 # range of dof pos at reset + reset_dof_vel_noise = 0.0 # range of dof vel at reset + # reward scales + dist_reward_scale = -10.0 + rot_reward_scale = 1.0 + rot_eps = 0.1 + action_penalty_scale = -0.0002 + reach_goal_bonus = 250 + fall_penalty = -50 + fall_dist = 0.24 + vel_obs_scale = 0.2 + success_tolerance = 0.4 + max_consecutive_success = 50 + av_factor = 0.1 + act_moving_average = 0.3 + force_torque_obs_scale = 10.0 + # domain randomization config + events: EventCfg = EventCfg() + # at every time-step add gaussian noise + bias. The bias is a gaussian sampled at reset + action_noise_model: NoiseModelWithAdditiveBiasCfg = NoiseModelWithAdditiveBiasCfg( + noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.05, operation="add"), + bias_noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.015, operation="abs"), + ) + # at every time-step add gaussian noise + bias. The bias is a gaussian sampled at reset + observation_noise_model: NoiseModelWithAdditiveBiasCfg = NoiseModelWithAdditiveBiasCfg( + noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.002, operation="add"), + bias_noise_cfg=GaussianNoiseCfg(mean=0.0, std=0.0001, operation="abs"), + ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py new file mode 100644 index 000000000000..b71709a86158 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py @@ -0,0 +1,191 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from __future__ import annotations + +import torch + +from PIL import Image + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation, RigidObject +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import TiledCamera, TiledCameraCfg +from isaaclab.utils import configclass +from isaaclab.utils.math import quat_apply + +from isaaclab_tasks.direct.inhand_manipulation.inhand_manipulation_env import InHandManipulationEnv, unscale + +from .feature_extractor import FeatureExtractor, FeatureExtractorCfg +from .shadow_hand_env_cfg import ShadowHandEnvCfg + + +@configclass +class ShadowHandVisionEnvCfg(ShadowHandEnvCfg): + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=1225, env_spacing=2.0, replicate_physics=True) + + # camera + tiled_camera: TiledCameraCfg = TiledCameraCfg( + renderer_type="newton_warp", + prim_path="/World/envs/env_.*/Camera", + offset=TiledCameraCfg.OffsetCfg(pos=(0, 0.4, 2), rot=(0, 1, 0, 0), convention="opengl"), + data_types=["rgb", "depth", "semantic_segmentation"], + spawn=sim_utils.PinholeCameraCfg( + focal_length=24.0, focus_distance=400.0, horizontal_aperture=20.955, clipping_range=(0.1, 20.0) + ), + width=120, + height=120, + ) + feature_extractor = FeatureExtractorCfg() + + # env + observation_space = 164 + 27 # state observation + vision CNN embedding + state_space = 187 + 27 # asymmetric states + vision CNN embedding + + +@configclass +class ShadowHandVisionEnvPlayCfg(ShadowHandVisionEnvCfg): + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=64, env_spacing=2.0, replicate_physics=True) + # inference for CNN + feature_extractor = FeatureExtractorCfg(train=False, load_checkpoint=True) + + +class ShadowHandVisionEnv(InHandManipulationEnv): + cfg: ShadowHandVisionEnvCfg + + def __init__(self, cfg: ShadowHandVisionEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + # Use the log directory from the configuration + self.feature_extractor = FeatureExtractor(self.cfg.feature_extractor, self.device, self.cfg.log_dir) + # hide goal cubes + self.goal_pos[:, :] = torch.tensor([-0.2, 0.1, 0.6], device=self.device) + # keypoints buffer + self.gt_keypoints = torch.ones(self.num_envs, 8, 3, dtype=torch.float32, device=self.device) + self.goal_keypoints = torch.ones(self.num_envs, 8, 3, dtype=torch.float32, device=self.device) + + def _setup_scene(self): + # add hand, in-hand object, and goal object + self.hand = Articulation(self.cfg.robot_cfg) + self.object = Articulation(self.cfg.object_cfg) + self._tiled_camera = TiledCamera(self.cfg.tiled_camera) + # clone and replicate (no need to filter for this environment) + self.scene.clone_environments(copy_from_source=False) + # add articulation to scene - we must register to scene to randomize with EventManager + self.scene.articulations["robot"] = self.hand + self.scene.articulations["object"] = self.object + self.scene.sensors["tiled_camera"] = self._tiled_camera + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _compute_image_observations(self): + # generate ground truth keypoints for in-hand cube + compute_keypoints(pose=torch.cat((self.object_pos, self.object_rot), dim=1), out=self.gt_keypoints) + + object_pose = torch.cat([self.object_pos, self.gt_keypoints.view(-1, 24)], dim=-1) + + # train CNN to regress on keypoint positions + pose_loss, embeddings = self.feature_extractor.step( + self._tiled_camera.data.output["rgb"], + self._tiled_camera.data.output["depth"], + self._tiled_camera.data.output["semantic_segmentation"][..., :3], + object_pose, + ) + + self.embeddings = embeddings.clone().detach() + # compute keypoints for goal cube + compute_keypoints( + pose=torch.cat((torch.zeros_like(self.goal_pos), self.goal_rot), dim=-1), out=self.goal_keypoints + ) + + obs = torch.cat( + ( + self.embeddings, + self.goal_keypoints.view(-1, 24), + ), + dim=-1, + ) + + # log pose loss from CNN training + if "log" not in self.extras: + self.extras["log"] = dict() + self.extras["log"]["pose_loss"] = pose_loss + + return obs + + def _compute_proprio_observations(self): + """Proprioception observations from physics.""" + obs = torch.cat( + ( + # hand + unscale(self.hand_dof_pos, self.hand_dof_lower_limits, self.hand_dof_upper_limits), + self.cfg.vel_obs_scale * self.hand_dof_vel, + # goal + self.in_hand_pos, + self.goal_rot, + # fingertips + self.fingertip_pos.view(self.num_envs, self.num_fingertips * 3), + self.fingertip_rot.view(self.num_envs, self.num_fingertips * 4), + self.fingertip_velocities.view(self.num_envs, self.num_fingertips * 6), + # actions + self.actions, + ), + dim=-1, + ) + return obs + + def _compute_states(self): + """Asymmetric states for the critic.""" + sim_states = self.compute_full_state() + state = torch.cat((sim_states, self.embeddings), dim=-1) + return state + + def _get_observations(self) -> dict: + # proprioception observations + state_obs = self._compute_proprio_observations() + # vision observations from CMM + image_obs = self._compute_image_observations() + obs = torch.cat((state_obs, image_obs), dim=-1) + # asymmetric critic states + # TODO: implement once mewton extended state attributes are in + # self.fingertip_force_sensors = self.hand.root_physx_view.get_link_incoming_joint_force()[:, self.finger_bodies] + state = self._compute_states() + + observations = {"policy": obs, "critic": state} + return observations + + +@torch.jit.script +def compute_keypoints( + pose: torch.Tensor, + num_keypoints: int = 8, + size: tuple[float, float, float] = (2 * 0.03, 2 * 0.03, 2 * 0.03), + out: torch.Tensor | None = None, +): + """Computes positions of 8 corner keypoints of a cube. + + Args: + pose: Position and orientation of the center of the cube. Shape is (N, 7) + num_keypoints: Number of keypoints to compute. Default = 8 + size: Length of X, Y, Z dimensions of cube. Default = [0.06, 0.06, 0.06] + out: Buffer to store keypoints. If None, a new buffer will be created. + """ + num_envs = pose.shape[0] + if out is None: + out = torch.ones(num_envs, num_keypoints, 3, dtype=torch.float32, device=pose.device) + else: + out[:] = 1.0 + for i in range(num_keypoints): + # which dimensions to negate + n = [((i >> k) & 1) == 0 for k in range(3)] + corner_loc = ([(1 if n[k] else -1) * s / 2 for k, s in enumerate(size)],) + corner = torch.tensor(corner_loc, dtype=torch.float32, device=pose.device) * out[:, i, :] + # express corner position in the world frame + out[:, i, :] = pose[:, :3] + quat_apply(pose[:, 3:7], corner) + + return out