Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,8 @@ To generate the locomanipulation dataset, use the following command:
--navigate_step 130 \
--enable_pinocchio \
--output_file ./datasets/generated_dataset_g1_locomanipulation_sdg.hdf5 \
--enable_cameras
--enable_cameras \
--visualizer kit

.. note::

Expand Down
27 changes: 20 additions & 7 deletions scripts/imitation_learning/locomanipulation_sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
import isaaclab.sim as sim_utils
from isaaclab.utils import configclass
from isaaclab.utils.datasets import EpisodeData, HDF5DatasetFileHandler
from isaaclab.utils.math import convert_quat

import isaaclab_mimic.locomanipulation_sdg.envs # noqa: F401
from isaaclab_mimic.locomanipulation_sdg.data_classes import LocomanipulationSDGOutputData
Expand Down Expand Up @@ -285,7 +286,8 @@ def setup_navigation_scene(
parent=env.get_end_fixture(),
)
base_goal_approach = RelativePose(
relative_pose=torch.tensor([-approach_distance, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]), parent=base_goal
relative_pose=torch.tensor([-approach_distance, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], device=env.device),
parent=base_goal,
)

# Plan navigation path
Expand Down Expand Up @@ -321,7 +323,7 @@ def handle_grasp_state(
# Set control targets - robot stays stationary during grasping
output_data.data_generation_state = int(LocomanipulationSDGDataGenerationState.GRASP_OBJECT)
output_data.recording_step = recording_step
output_data.base_velocity_target = torch.tensor([0.0, 0.0, 0.0])
output_data.base_velocity_target = torch.tensor([0.0, 0.0, 0.0], device=env.device)

# Transform hand poses relative to object
output_data.left_hand_pose_target = transform_relative_pose(
Expand Down Expand Up @@ -369,7 +371,7 @@ def handle_lift_state(
# Set control targets - robot stays stationary during lifting
output_data.data_generation_state = int(LocomanipulationSDGDataGenerationState.LIFT_OBJECT)
output_data.recording_step = recording_step
output_data.base_velocity_target = torch.tensor([0.0, 0.0, 0.0])
output_data.base_velocity_target = torch.tensor([0.0, 0.0, 0.0], device=env.device)

# Transform hand poses relative to base
output_data.left_hand_pose_target = transform_relative_pose(
Expand Down Expand Up @@ -428,7 +430,7 @@ def handle_navigate_state(
# Set control targets
output_data.data_generation_state = int(LocomanipulationSDGDataGenerationState.NAVIGATE)
output_data.recording_step = recording_step
output_data.base_velocity_target = torch.tensor([linear_velocity, 0.0, angular_velocity])
output_data.base_velocity_target = torch.tensor([linear_velocity, 0.0, angular_velocity], device=env.device)

# Transform hand poses relative to base
output_data.left_hand_pose_target = transform_relative_pose(
Expand Down Expand Up @@ -482,7 +484,7 @@ def handle_approach_state(
# Set control targets
output_data.data_generation_state = int(LocomanipulationSDGDataGenerationState.APPROACH)
output_data.recording_step = recording_step
output_data.base_velocity_target = torch.tensor([linear_velocity, 0.0, angular_velocity])
output_data.base_velocity_target = torch.tensor([linear_velocity, 0.0, angular_velocity], device=env.device)

# Transform hand poses relative to base
output_data.left_hand_pose_target = transform_relative_pose(
Expand Down Expand Up @@ -543,7 +545,7 @@ def handle_drop_off_state(
# Set control targets
output_data.data_generation_state = int(LocomanipulationSDGDataGenerationState.DROP_OFF_OBJECT)
output_data.recording_step = recording_step
output_data.base_velocity_target = torch.tensor([linear_velocity, 0.0, angular_velocity])
output_data.base_velocity_target = torch.tensor([linear_velocity, 0.0, angular_velocity], device=env.device)

# Transform hand poses relative to end fixture
output_data.left_hand_pose_target = transform_relative_pose(
Expand Down Expand Up @@ -640,7 +642,9 @@ def replay(
"""

# Initialize environment to starting state
env.reset_to(state=input_episode_data.get_initial_state(), env_ids=torch.tensor([0]), is_relative=True)
env.reset_to(
state=input_episode_data.get_initial_state(), env_ids=torch.tensor([0], device=env.device), is_relative=True
)

# Create navigation control configuration
config = LocomanipulationSDGControlConfig(
Expand Down Expand Up @@ -742,6 +746,7 @@ def replay(
# Load input data
input_dataset_file_handler = HDF5DatasetFileHandler()
input_dataset_file_handler.open(args_cli.dataset)
is_legacy_quat_format = input_dataset_file_handler.is_legacy_quaternion_format()

for i in range(args_cli.num_runs):
if args_cli.demo is None:
Expand All @@ -751,6 +756,14 @@ def replay(

input_episode_data = input_dataset_file_handler.load_episode(demo, args_cli.device)

# Convert action quaternions from legacy WXYZ to XYZW format if needed.
# load_episode() auto-converts root_pose state keys, but not action data.
# The action format is: [left_pos(3), left_quat(4), right_pos(3), right_quat(4), joints(14)]
if is_legacy_quat_format and input_episode_data is not None and "actions" in input_episode_data.data:
actions = input_episode_data.data["actions"]
actions[:, 3:7] = convert_quat(actions[:, 3:7], to="xyzw") # left hand quat
actions[:, 10:14] = convert_quat(actions[:, 10:14], to="xyzw") # right hand quat

replay(
env=env,
input_episode_data=input_episode_data,
Expand Down
16 changes: 8 additions & 8 deletions source/isaaclab/isaaclab/scene/interactive_scene.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,15 +454,15 @@ def reset_to(
for asset_name, articulation in self._articulations.items():
asset_state = state["articulation"][asset_name]
# root state
root_pose = asset_state["root_pose"].clone()
root_pose = asset_state["root_pose"].clone().to(self.device)
if is_relative:
root_pose[:, :3] += self.env_origins[env_ids]
root_velocity = asset_state["root_velocity"].clone()
root_velocity = asset_state["root_velocity"].clone().to(self.device)
articulation.write_root_pose_to_sim(root_pose, env_ids=env_ids)
articulation.write_root_velocity_to_sim(root_velocity, env_ids=env_ids)
# joint state
joint_position = asset_state["joint_position"].clone()
joint_velocity = asset_state["joint_velocity"].clone()
joint_position = asset_state["joint_position"].clone().to(self.device)
joint_velocity = asset_state["joint_velocity"].clone().to(self.device)
articulation.write_joint_state_to_sim(joint_position, joint_velocity, env_ids=env_ids)
# FIXME: This is not generic as it assumes PD control over the joints.
# This assumption does not hold for effort controlled joints.
Expand All @@ -471,19 +471,19 @@ def reset_to(
# deformable objects
for asset_name, deformable_object in self._deformable_objects.items():
asset_state = state["deformable_object"][asset_name]
nodal_position = asset_state["nodal_position"].clone()
nodal_position = asset_state["nodal_position"].clone().to(self.device)
if is_relative:
nodal_position[:, :3] += self.env_origins[env_ids]
nodal_velocity = asset_state["nodal_velocity"].clone()
nodal_velocity = asset_state["nodal_velocity"].clone().to(self.device)
deformable_object.write_nodal_pos_to_sim(nodal_position, env_ids=env_ids)
deformable_object.write_nodal_velocity_to_sim(nodal_velocity, env_ids=env_ids)
# rigid objects
for asset_name, rigid_object in self._rigid_objects.items():
asset_state = state["rigid_object"][asset_name]
root_pose = asset_state["root_pose"].clone()
root_pose = asset_state["root_pose"].clone().to(self.device)
if is_relative:
root_pose[:, :3] += self.env_origins[env_ids]
root_velocity = asset_state["root_velocity"].clone()
root_velocity = asset_state["root_velocity"].clone().to(self.device)
rigid_object.write_root_pose_to_sim(root_pose, env_ids=env_ids)
rigid_object.write_root_velocity_to_sim(root_velocity, env_ids=env_ids)
# surface grippers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,16 +160,17 @@ def load_input_data(self, episode_data: EpisodeData, step: int) -> Locomanipulat

object_pose = dataset_state["rigid_object"]["object"]["root_pose"]

base_pose = episode_data.get_initial_state()["articulation"]["robot"]["root_pose"]
data = LocomanipulationSDGInputData(
left_hand_pose_target=dataset_action[0:7],
right_hand_pose_target=dataset_action[7:14],
left_hand_joint_positions_target=dataset_action[14:21],
right_hand_joint_positions_target=dataset_action[21:28],
base_pose=episode_data.get_initial_state()["articulation"]["robot"]["root_pose"],
base_pose=base_pose,
object_pose=object_pose,
fixture_pose=torch.tensor(
[0.0, 0.55, -0.3, 1.0, 0.0, 0.0, 0.0]
), # Table pose is not recorded for this env.
[0.0, 0.55, -0.3, 0.0, 0.0, 0.0, 1.0], device=base_pose.device
), # Table pose is not recorded for this env. Quaternion in XYZW format (identity).
)

return data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

import numpy as np
import torch
import warp as wp

import isaaclab.utils.math as math_utils
from isaaclab.sim.views import XformPrimView

from .occupancy_map_utils import OccupancyMap, intersect_occupancy_maps
from .transform_utils import transform_mul
Expand Down Expand Up @@ -83,7 +85,8 @@ def __init__(self, scene, entity_name: str, body_name: str):

def get_pose(self):
"""Get the 3D pose of the entity."""
pose = self.scene[self.entity_name].data.body_link_state_w[
body_link_state_w = wp.to_torch(self.scene[self.entity_name].data.body_link_state_w)
pose = body_link_state_w[
:,
self.scene[self.entity_name].data.body_names.index(self.body_name),
:7,
Expand All @@ -98,16 +101,25 @@ def __init__(self, scene, entity_name: str):
self.scene = scene
self.entity_name = entity_name

def _get_xform_view(self) -> XformPrimView:
"""Return the XformPrimView for this asset, refreshing it if prims were not yet cloned."""
xform_prim = self.scene[self.entity_name]
if xform_prim.count == 0:
# The view was created before environment cloning; rebuild it now that prims exist.
xform_prim = XformPrimView(xform_prim._prim_path, device=xform_prim.device)
self.scene.extras[self.entity_name] = xform_prim
return xform_prim

def get_pose(self):
"""Get the 3D pose of the entity."""
xform_prim = self.scene[self.entity_name]
xform_prim = self._get_xform_view()
position, orientation = xform_prim.get_world_poses()
pose = torch.cat([position, orientation], dim=-1)
return pose

def set_pose(self, pose: torch.Tensor):
"""Set the 3D pose of the entity."""
xform_prim = self.scene[self.entity_name]
xform_prim = self._get_xform_view()
position = pose[..., :3]
orientation = pose[..., 3:]
xform_prim.set_world_poses(position, orientation, None)
Expand All @@ -124,8 +136,9 @@ def get_pose(self):
"""Get the 3D pose of the entity."""

parent_pose = self.parent.get_pose()
relative_pose = self.relative_pose.to(parent_pose.device)

pose = transform_mul(parent_pose, self.relative_pose)
pose = transform_mul(parent_pose, relative_pose)

return pose

Expand All @@ -146,6 +159,10 @@ def get_occupancy_map(self):
)

transform = self.get_transform_2d().detach().cpu().numpy()
# get_world_poses() may return a batched (num_envs, 3, 3) or empty (0, 3, 3) tensor.
# For a fixed background asset placed at the world origin, fall back to identity when empty.
if transform.ndim == 3:
transform = transform[0] if transform.shape[0] > 0 else np.eye(3)

occupancy_map = local_occupancy_map.transformed(transform)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,8 @@ def transform_inv(transform: torch.Tensor) -> torch.Tensor:

def transform_relative_pose(world_pose: torch.Tensor, src_frame_pose: torch.Tensor, dst_frame_pose: torch.Tensor):
"""Compute the relative pose with respect to a source frame, and apply this relative pose to a destination frame."""
device = dst_frame_pose.device
world_pose = world_pose.to(device)
src_frame_pose = src_frame_pose.to(device)
pose = transform_mul(dst_frame_pose, transform_mul(transform_inv(src_frame_pose), world_pose))
return pose
Loading