diff --git a/autonomy/simulation/Humanoid_Wato/arm_assembly/fingertip_ik.py b/autonomy/simulation/Humanoid_Wato/arm_assembly/fingertip_ik.py new file mode 100644 index 0000000..072ee6e --- /dev/null +++ b/autonomy/simulation/Humanoid_Wato/arm_assembly/fingertip_ik.py @@ -0,0 +1,265 @@ +""" +Standalone gradient-based IK for arm_assembly (6-DOF arm + 15-DOF hand). +Solves for joint angles so that 5 fingertips reach desired world positions +using damped least-squares (no dependencies on other scripts in this repo). + +Joint 0 (shoulder_flexion_extension) issue: + MuJoCo's URDF importer can set continuous joints to range [0,0], which would + lock joint 0 at zero. We bypass that in clip_qpos_to_limits(): when + lower == upper we skip clipping so the IK can move that joint. See + diagnose_jacobian_joint0() and print_joint_limits_urdf() to inspect. +""" +import os +import mujoco +import numpy as np +from pathlib import Path + +# Path to URDF (script next to arm_assembly folder or repo root) +_SCRIPT_DIR = Path(__file__).resolve().parent +URDF_PATH = _SCRIPT_DIR / "arm_assembly.urdf" + +# Fingertip body names (distal link of each finger in arm_assembly.urdf) +FINGERTIP_BODIES = [ + "IP_THUMB_v1_1", # thumb + "DIP_INDEX_v1_1", # index + "DIP_MIDDLE_v1_1", # middle + "DIP_RING_v1_1", # ring + "DIP_PINKY_v1_1", # pinky +] + +# First N DOFs are arm (shoulder, elbow, wrist); rest are hand. Used for weighted IK. +NUM_ARM_DOFS = 6 + + +def load_model(urdf_path=None): + path = Path(urdf_path or URDF_PATH).resolve() + if not path.exists(): + raise FileNotFoundError(f"URDF not found: {path}") + mesh_dir = path.parent / "meshes" + if not mesh_dir.is_dir(): + raise FileNotFoundError(f"Mesh directory not found: {mesh_dir}") + # MuJoCo's URDF importer keeps only mesh basename and opens from cwd. + # Use mesh filenames without "meshes/" and chdir to meshes so they are found. + xml = path.read_text() + xml = xml.replace('filename="meshes/', 'filename="') + old_cwd = os.getcwd() + try: + os.chdir(mesh_dir) + return mujoco.MjModel.from_xml_string(xml) + finally: + os.chdir(old_cwd) + + +def body_name_to_id(model: mujoco.MjModel, name: str) -> int: + try: + return model.body(name).id + except KeyError: + # List bodies for debugging + names = [model.body(i).name for i in range(model.nbody)] + raise KeyError(f"Body '{name}' not in model. Available: {names}") + + +def print_joint_limits_urdf(model: mujoco.MjModel) -> None: + """Print joint position limits from the MuJoCo model (from URDF). + Use this to compare with USD joint limits (e.g. in Isaac Sim). + """ + print("[joint limits] From URDF (MuJoCo model.jnt_range):") + for i in range(model.njnt): + jnt_type = model.jnt_type[i] + if jnt_type not in (mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE): + continue + name = mujoco.mj_id2name(model, mujoco.mjtObj.mjOBJ_JOINT, i) or f"joint_{i}" + qposadr = model.jnt_qposadr[i] + if qposadr < 0: + continue + lo, hi = model.jnt_range[i, 0], model.jnt_range[i, 1] + finite = np.isfinite(lo) or np.isfinite(hi) + marker = " <-- joint 0 (shoulder_flexion)" if qposadr == 0 else "" + if finite: + print(f" [qpos {qposadr}] {name}: lower={lo:.4f}, upper={hi:.4f}{marker}") + if qposadr == 0 and lo == hi: + print(" ^ LOCKED at one value (lower=upper). MuJoCo URDF import may have set continuous joints to [0,0]. Fix URDF or USD so this joint has a real range.") + else: + print(f" [qpos {qposadr}] {name}: (no limits / continuous){marker}") + + +def clip_qpos_to_limits(model: mujoco.MjModel, data: mujoco.MjData) -> None: + """Clip data.qpos to joint limits (only for limited joints). + Bypass: if lower == upper (e.g. MuJoCo imported continuous as [0,0]), skip clipping + so the IK can move that joint instead of locking it. + """ + for i in range(model.njnt): + jnt_type = model.jnt_type[i] + if jnt_type not in (mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE): + continue + qposadr = model.jnt_qposadr[i] + if qposadr < 0: + continue + lo = model.jnt_range[i, 0] + hi = model.jnt_range[i, 1] + if lo == hi: + continue # bypass: treat as unlimited (e.g. continuous imported as [0,0]) + if np.isfinite(lo) or np.isfinite(hi): + data.qpos[qposadr] = np.clip(data.qpos[qposadr], lo, hi) + + +def solve_fingertip_ik( + model: mujoco.MjModel, + data: mujoco.MjData, + targets: dict[str, np.ndarray], + *, + damping: float = 1e-4, + step: float = 0.5, + max_iter: int = 200, + tol: float = 1e-4, + clip_limits: bool = True, + arm_dofs: int = NUM_ARM_DOFS, + finger_weight: float = 4.0, + joint0_weight: float = 0.4, +) -> tuple[np.ndarray, bool, float]: + """ + Run damped least-squares IK so that each fingertip reaches its target. + + Weighted regularization: arm DOFs (first arm_dofs) are penalized less than + finger DOFs (finger_weight > 1), so the solver prefers moving the arm to reach. + joint0_weight < 1 (default 0.4) further prefers joint 0 (shoulder_flexion) when + error has a Y/Z component, so the arm uses it to reduce large Y (or Y+Z) error. + + Joint 0 rotates about X so it cannot help with pure X error; for Y/Z error it can. + targets: dict mapping body name -> (3,) world position. + Returns: (qpos, converged, final_max_error). + """ + # Build list of (body_name, target_pos) in consistent order + tip_names = [b for b in FINGERTIP_BODIES if b in targets] + if not tip_names: + raise ValueError("targets must contain at least one of %s" % FINGERTIP_BODIES) + target_list = [targets[b] for b in tip_names] + + # Diagonal regularization: joint 0 preferred when < 1, other arm 1, fingers finger_weight. + nv = model.nv + arm_dofs = min(arm_dofs, nv) + W = np.ones(nv, dtype=np.float64) + W[0] = max(1e-3, float(joint0_weight)) + W[arm_dofs:] = finger_weight + + for _ in range(max_iter): + mujoco.mj_forward(model, data) + + J_list = [] + err_list = [] + + for body_name, target in zip(tip_names, target_list): + body_id = body_name_to_id(model, body_name) + pos = data.xpos[body_id].copy() + err = target - pos + jacp = np.zeros((3, model.nv)) + jacr = np.zeros((3, model.nv)) + mujoco.mj_jacBody(model, data, jacp, jacr, body_id) + J_list.append(jacp) + err_list.append(err) + + J = np.vstack(J_list) + err = np.concatenate(err_list) + max_err = np.max(np.abs(err)) + + if max_err < tol: + return data.qpos.copy(), True, float(max_err) + + # Weighted damped least squares + A = J.T @ J + damping * np.diag(W) + rhs = J.T @ err + dq = np.linalg.solve(A, rhs) + + # All joints in this model are 1-DOF; qpos layout matches dofs. + data.qpos[:nv] += step * dq + + if clip_limits: + clip_qpos_to_limits(model, data) + + return data.qpos.copy(), False, float(max_err) + + +def get_fingertip_positions(model: mujoco.MjModel, data: mujoco.MjData) -> dict[str, np.ndarray]: + """Return current world positions of all 5 fingertips.""" + mujoco.mj_forward(model, data) + out = {} + for name in FINGERTIP_BODIES: + try: + bid = body_name_to_id(model, name) + out[name] = data.xpos[bid].copy() + except KeyError: + pass + return out + + +def diagnose_jacobian_joint0( + model: mujoco.MjModel, + data: mujoco.MjData, + err: np.ndarray | None = None, +) -> None: + """ + At current qpos, build the 15x21 task Jacobian J and explain why joint 0 + (shoulder_flexion_extension) often gets dq[0] ≈ 0. Joint 0 rotates about X, + so J's column 0 has zero X components; pure-X position error contributes + nothing to (J.T @ err)[0]. + + If err is provided (15-D task error: [ex,ey,ez] x 5 fingertips), also prints + error by axis and (J.T @ err) for first 6 joints so you can see if joint 0 + gets a non-zero rhs when error is in Y. + """ + mujoco.mj_forward(model, data) + J_list = [] + for name in FINGERTIP_BODIES: + bid = body_name_to_id(model, name) + jacp = np.zeros((3, model.nv)) + jacr = np.zeros((3, model.nv)) + mujoco.mj_jacBody(model, data, jacp, jacr, bid) + J_list.append(jacp) + J = np.vstack(J_list) + j0 = J[:, 0] + x_rows = [0, 3, 6, 9, 12] + j0_x = j0[x_rows] + j0_yz = np.delete(j0, x_rows) + print("Joint 0 (shoulder_flexion_extension) Jacobian column (15-D task):") + print(" ||J[:,0]|| =", np.linalg.norm(j0)) + print(" X components (rows 0,3,6,9,12):", j0_x, " -> should be ~0 (joint rotates about X)") + print(" Y,Z components norm:", np.linalg.norm(j0_yz)) + err_x = np.zeros(15) + err_x[0::3] = 1.0 + rhs0_x = (J.T @ err_x)[0] + print(" (J.T @ err_pure_X)[0] =", rhs0_x, " -> 0 means joint 0 gets no update for forward (X) reach") + err_y = np.zeros(15) + err_y[1::3] = 1.0 + rhs0_y = (J.T @ err_y)[0] + rhs1_y = (J.T @ err_y)[1] + print(" (J.T @ err_pure_Y)[0] =", rhs0_y, " [1] =", rhs1_y, " -> joint 0 can help for Y error") + print(" So dq[0] stays 0 when error is mostly in world X.") + + if err is not None and err.size == 15: + rhs = J.T @ err + err_x_sum = np.sum(np.abs(err[0::3])) + err_y_sum = np.sum(np.abs(err[1::3])) + err_z_sum = np.sum(np.abs(err[2::3])) + print(" --- With current task error ---") + print(" |error| by axis (sum over 5 tips): X =", err_x_sum, " Y =", err_y_sum, " Z =", err_z_sum) + print(" (J.T @ err)[0:6] (arm joints):", rhs[:6]) + print(" -> If Y error is large but rhs[0] ~ 0, other joints are covering Y; if rhs[0] != 0, joint 0 could step.") + + +def main(): + model = load_model() + data = mujoco.MjData(model) + + # --- Joint limits from URDF (MuJoCo) --- + print("========== Joint limits (URDF / MuJoCo) ==========") + print_joint_limits_urdf(model) + print() + + data.qpos[:] = 0.0 + mujoco.mj_forward(model, data) + default_positions = get_fingertip_positions(model, data) + print(f"[sanity] found {len(default_positions)}/5 fingertip bodies in MuJoCo model") + + +if __name__ == "__main__": + main() diff --git a/autonomy/simulation/Humanoid_Wato/fingertip_ik_isaac_sim.py b/autonomy/simulation/Humanoid_Wato/fingertip_ik_isaac_sim.py new file mode 100644 index 0000000..4f4c8d6 --- /dev/null +++ b/autonomy/simulation/Humanoid_Wato/fingertip_ik_isaac_sim.py @@ -0,0 +1,246 @@ +import argparse +from isaaclab.app import AppLauncher + +parser = argparse.ArgumentParser(description="Fingertip IK in Isaac Sim: 5 EE targets, 21-DOF arm+hand") +AppLauncher.add_app_launcher_args(parser) +args_cli = parser.parse_args() + +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app + +import torch +import numpy as np +import isaaclab.sim as sim_utils +from isaaclab.assets import AssetBaseCfg +from isaaclab.managers import SceneEntityCfg +from isaaclab.scene import InteractiveScene, InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.markers import VisualizationMarkers, VisualizationMarkersCfg + +from HumanoidRL.HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG + +from arm_assembly.fingertip_ik import ( + load_model, + solve_fingertip_ik, + get_fingertip_positions, + FINGERTIP_BODIES, +) + +@configclass +class FingertipIKSceneCfg(InteractiveSceneCfg): + ground = AssetBaseCfg( + prim_path="/World/defaultGroundPlane", + spawn=sim_utils.GroundPlaneCfg(), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)), + ) + dome_light = AssetBaseCfg( + prim_path="/World/Light", + spawn=sim_utils.DomeLightCfg(intensity=3000.0, color=(0.75, 0.75, 0.75)), + ) + robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + +def print_joint_limits_from_usd(robot): + """Print joint position limits from the USD-loaded articulation (Isaac Sim). + Use this to check if joint 0 (shoulder_flexion_extension) has limits that could block motion. + """ + names = getattr(robot.data, "joint_names", None) + limits = getattr(robot.data, "default_joint_pos_limits", None) or getattr(robot.data, "joint_pos_limits", None) + if names is None or limits is None: + print("[joint limits] joint_names or (default_)joint_pos_limits not found on robot.data") + return + # limits shape: (num_instances, num_joints, 2) or (num_joints, 2) -> [lower, upper] + lim = limits.cpu().numpy() + if lim.ndim == 3: + lim = lim[0] + low, high = lim[:, 0], lim[:, 1] + print("[joint limits] From USD (robot.data.default_joint_pos_limits or joint_pos_limits):") + for i, name in enumerate(names): + marker = " <-- joint 0 (shoulder_flexion)" if i == 0 else "" + print(f" [{i}] {name}: lower={low[i]:.4f}, upper={high[i]:.4f}{marker}") + if hasattr(robot.data, "soft_joint_pos_limits") and robot.data.soft_joint_pos_limits is not None: + soft = robot.data.soft_joint_pos_limits[0].cpu().numpy() + if soft.ndim == 2: + print("[joint limits] soft_joint_pos_limits (first 6):", soft[:6]) + + +def run_simulator( + sim: sim_utils.SimulationContext, + scene: InteractiveScene, + pose_sequence: list[tuple[np.ndarray, np.ndarray, float]], + hold_seconds: float = 1.5, +): + robot = scene["robot"] + robot_entity_cfg = SceneEntityCfg("robot", joint_names=[".*"], body_names=[".*"]) + robot_entity_cfg.resolve(scene) + + sim_dt = sim.get_physics_dt() + steps_per_pose = max(1, int(hold_seconds / sim_dt)) + joint_vel = robot.data.default_joint_vel.clone() + + # Markers for the 5 target EE positions (spheres in Isaac) + target_marker_cfg = VisualizationMarkersCfg( + prim_path="/Visuals/fingertip_targets", + markers={ + "target": sim_utils.SphereCfg( + radius=0.015, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.2, 0.2)), + ), + }, + ) + target_markers = VisualizationMarkers(target_marker_cfg) + + # Markers for the 5 current fingertip EE positions (tracking visualization) + current_marker_cfg = VisualizationMarkersCfg( + prim_path="/Visuals/fingertip_currents", + markers={ + "current": sim_utils.SphereCfg( + radius=0.012, + visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(0.1, 1.0, 0.1)), + ), + }, + ) + current_markers = VisualizationMarkers(current_marker_cfg) + + # Map fingertip body names (from URDF) to Isaac body indices once. + fingertip_body_ids: list[int | None] = [] + for name in FINGERTIP_BODIES: + ids, _ = robot.find_bodies(name, preserve_order=True) + fingertip_body_ids.append(ids[0] if ids else None) + + pose_idx = 0 + step_count = 0 + n_poses = len(pose_sequence) + print(f"pose 1/{n_poses}: IK err {pose_sequence[0][2]:.4f} m") + + while simulation_app.is_running(): + # Advance to next pose when we've held long enough + if step_count >= steps_per_pose: + step_count = 0 + pose_idx = (pose_idx + 1) % n_poses + ik_err = pose_sequence[pose_idx][2] + print(f"pose {pose_idx + 1}/{n_poses}: IK err {ik_err:.4f} m") + + # Interpolate between current and next pose so motion is smooth (no snap/reset) + next_idx = (pose_idx + 1) % n_poses + alpha = step_count / steps_per_pose if steps_per_pose > 0 else 1.0 + alpha = min(alpha, 1.0) + + qpos_cur, targets_cur, _ = pose_sequence[pose_idx] + qpos_next, targets_next, _ = pose_sequence[next_idx] + qpos_interp = (1.0 - alpha) * qpos_cur + alpha * qpos_next + targets_interp = (1.0 - alpha) * targets_cur + alpha * targets_next + + # Write interpolated state every frame so robot and markers stay in sync (no PD lag) + joint_pos_tensor = torch.tensor(qpos_interp, dtype=torch.float32, device=sim.device) + robot.write_joint_state_to_sim(joint_pos_tensor, joint_vel) + targets_world = torch.tensor(targets_interp, dtype=torch.float32, device=sim.device) + scene.env_origins[0] + target_markers.visualize(translations=targets_world) + + # Visualize current fingertip locations (world frame) next to targets + current_positions = [] + for body_id in fingertip_body_ids: + if body_id is None: + current_positions.append([0.0, 0.0, 0.0]) + else: + pos_w = robot.data.body_pos_w[0, body_id, :] # world frame + current_positions.append(pos_w) + current_positions_tensor = torch.stack( + [p if torch.is_tensor(p) else torch.tensor(p, device=sim.device) for p in current_positions], + dim=0, + ).to(device=sim.device, dtype=torch.float32) + current_markers.visualize(translations=current_positions_tensor) + + scene.write_data_to_sim() + sim.step() + scene.update(sim_dt) + step_count += 1 + + # After the first sim step of each pose, print per-finger errors in Isaac world frame. + if step_count == 1: + targets_np = targets_world.cpu().numpy() + max_finger_err = 0.0 + print("[Isaac] Per-finger position error at current pose (m):") + for j, (name, body_id) in enumerate(zip(FINGERTIP_BODIES, fingertip_body_ids)): + if body_id is None: + print(f" {name}: (body not found)") + continue + tip_pos = robot.data.body_pos_w[0, body_id, :].cpu().numpy() + target_pos = targets_np[j] + err_vec = tip_pos - target_pos + err_norm = float(np.linalg.norm(err_vec)) + max_finger_err = max(max_finger_err, err_norm) + print(f" {name}: err={err_norm:.4f}, tip={tip_pos}, target={target_pos}") + print(f"[Isaac] Max fingertip error this pose: {max_finger_err:.4f} m") + + +def main(): + import mujoco + + # ---- Run IK for several 5-point target sets ---- + model = load_model() + data = mujoco.MjData(model) + data.qpos[:] = 0.0 + default_positions = get_fingertip_positions(model, data) + if len(default_positions) != 5: + raise RuntimeError("Could not get all 5 fingertip positions from model.") + + # Build a list of (dx, dy, dz) offsets in meters; each motion goes there and back. + n = 10 # steps per half (out or back) + range_m = 0.14 # motion amplitude + + def lin_there_back(start, end): + out = np.linspace(start, end, n, dtype=np.float64) + back = np.linspace(end, start, n, dtype=np.float64) + return np.concatenate([out, back[1:]]) + + # 1. Vertical: Z down then up + z_off = lin_there_back(0.0, -range_m) + offsets_vertical = np.column_stack([np.zeros_like(z_off), np.zeros_like(z_off), z_off]) + + # 2. Horizontal X: move in X then back + x_off = lin_there_back(0.0, -range_m) + offsets_x = np.column_stack([x_off, np.zeros_like(x_off), np.zeros_like(x_off)]) + + # 3. Horizontal Y: move in Y then back + y_off = lin_there_back(0.0, range_m) + offsets_y = np.column_stack([np.zeros_like(y_off), y_off, np.zeros_like(y_off)]) + + # 4. Diagonal: X and Z together + d_off = lin_there_back(0.0, -range_m * 0.8) + offsets_diag = np.column_stack([d_off, np.zeros_like(d_off), d_off]) + + all_offsets = np.vstack([offsets_vertical, offsets_x, offsets_y, offsets_diag]) + + pose_sequence = [] + n_poses = len(all_offsets) + for i, offset in enumerate(all_offsets): + targets_mjc = {name: pos + offset for name, pos in default_positions.items()} + data.qpos[:] = 0.0 + qpos, converged, max_err = solve_fingertip_ik( + model, data, targets_mjc, + damping=5e-4, step=0.25, max_iter=300, tol=5e-3, + ) + target_positions_isaac = np.stack([targets_mjc[name] for name in FINGERTIP_BODIES], axis=0).astype(np.float32) + pose_sequence.append((qpos.copy(), target_positions_isaac, max_err)) + print(f"pose {i + 1}/{n_poses}: IK err {max_err:.4f} m, qpos={qpos}") + + # ---- Isaac Sim ---- + sim_cfg = sim_utils.SimulationCfg(dt=0.01, device=args_cli.device) + sim = sim_utils.SimulationContext(sim_cfg) + sim.set_camera_view([2.5, 2.5, 2.5], [0.0, 0.0, 0.0]) + + scene_cfg = FingertipIKSceneCfg(num_envs=1, env_spacing=2.0) + scene = InteractiveScene(scene_cfg) + sim.reset() + print("[INFO] Isaac Sim ready. Press Play: targets move vertical, then X, then Y, then diagonal; fingers should follow.") + print(f"[INFO] env_origins[0] (marker offset) = {scene.env_origins[0].cpu().numpy()}") + + run_simulator(sim, scene, pose_sequence, hold_seconds=0.08) + + +if __name__ == "__main__": + main() + simulation_app.close() + +# PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p fingertip_ik_isaac_sim.py \ No newline at end of file diff --git a/autonomy/simulation/Teleop/wato_hand/data/configs/dex-retargeting/wato_hand_right_dexpilot.yml b/autonomy/simulation/Teleop/wato_hand/data/configs/dex-retargeting/wato_hand_right_dexpilot.yml index 3aad4da..fc7390b 100644 --- a/autonomy/simulation/Teleop/wato_hand/data/configs/dex-retargeting/wato_hand_right_dexpilot.yml +++ b/autonomy/simulation/Teleop/wato_hand/data/configs/dex-retargeting/wato_hand_right_dexpilot.yml @@ -1,8 +1,3 @@ -# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - retargeting: finger_tip_link_names: - IP_THUMB_v1_1 @@ -29,6 +24,5 @@ retargeting: - mcp_thumb - ip_thumb type: DexPilot - # Overwritten at runtime by wato_dex_retargeting_utils to Humanoid_Wato/arm_assembly/arm_assembly.urdf - urdf_path: /tmp/wato_arm_assembly.urdf + urdf_path: /tmp/wato_dex_arp_odej.urdf wrist_link_name: PALM_GAVIN_1DoF_Hinge_v2_1 diff --git a/autonomy/simulation/Teleop/wato_hand/run_dex_retargeting_test.py b/autonomy/simulation/Teleop/wato_hand/run_dex_retargeting_test.py new file mode 100644 index 0000000..ec65ab1 --- /dev/null +++ b/autonomy/simulation/Teleop/wato_hand/run_dex_retargeting_test.py @@ -0,0 +1,172 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers. +# SPDX-License-Identifier: BSD-3-Clause +""" +Test the dex_retargeting library (WatoHandDexRetargeting) for the custom Wato arm/hand. +No Isaac Lab / XR required — only dex_retargeting, torch, wato_dex_retargeting_utils, and arm_assembly URDF. + +Usage: + cd /path/to/autonomy/simulation/Teleop/wato_hand + python run_dex_retargeting_test.py + +Requires: torch, dex_retargeting, scipy, pyyaml (e.g. conda env with isaac lab deps). +Uses synthetic OpenXR-style hand poses to run left/right retargeting and print 15-DOF output. +""" + +from __future__ import annotations + +import cv2 +import numpy as np + +# Wato arm_assembly hand joint names (15 DOF per hand) +WATO_HAND_JOINT_NAMES = [ + "mcp_index", "pip_index", "dip_index", + "mcp_middle", "pip_middle", "dip_middle", + "mcp_ring", "pip_ring", "dip_ring", + "mcp_pinky", "pip_pinky", "dip_pinky", + "cmc_thumb", "mcp_thumb", "ip_thumb", +] + + +def make_fake_openxr_hand_poses(seed: int = 0) -> dict: + """Build fake OpenXR-style hand pose dict for one hand (26 joints + wrist). + + convert_hand_joints expects list(hand_poses.values()) to be ordered so index i = joint i. + So build with keys 0,1,...,25 then "wrist". Each joint value is [x,y,z]; wrist is [x,y,z, w,qx,qy,qz]. + """ + rng = np.random.default_rng(seed) + out = {} + for i in range(26): + out[i] = np.array([0.0, 0.0, 0.05 + (i * 0.005)], dtype=np.float64) + out[0] = np.array([0.0, 0.0, 0.0], dtype=np.float64) + out[1] = np.array([0.0, 0.0, 0.0], dtype=np.float64) + out["wrist"] = np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float64) + return out + + +def build_right_openxr_pose_from_mediapipe( + mediapipe_joint_pos: np.ndarray, # (21, 3), wrist-centered (wrist at 0) + wrist_rot: np.ndarray, # (3, 3) rotation matrix +) -> dict: + """Convert MediaPipe 21-joint world pose to OpenXR-style 26-joint + wrist. + + Your dex-retargeting Wato wrapper selects a subset of OpenXR joints defined by: + _OPENXR_HAND_JOINT_INDICES = [1,2,3,4,5,7,...,25] (21 joints) + so we fill those OpenXR indices from MediaPipe finger joints, including thumb/index/middle/ring/pinky. + """ + from scipy.spatial.transform import Rotation as R + + assert mediapipe_joint_pos.shape == (21, 3), mediapipe_joint_pos.shape + assert wrist_rot.shape == (3, 3), wrist_rot.shape + + # OpenXR-style pose dict expects keys 0..25 + "wrist". + hand_poses: dict[int | str, np.ndarray] = {} + + # MediaPipe joint index reference: + # 0 wrist, 1 thumb_cmc, 2 thumb_mcp, 3 thumb_ip, 4 thumb_tip + # 5 index_mcp, 6 index_pip, 7 index_dip, 8 index_tip + # 9 middle_mcp, 10 middle_pip, 11 middle_dip, 12 middle_tip + # 13 ring_mcp, 14 ring_pip, 15 ring_dip, 16 ring_tip + # 17 pinky_mcp, 18 pinky_pip, 19 pinky_dip, 20 pinky_tip + mp = mediapipe_joint_pos + + # Wrist (OpenXR joint index 1 is used by your wrapper subset) + wrist_xyz = mp[0] + hand_poses[1] = wrist_xyz.copy() + + # Thumb (OpenXR 2-5 correspond to thumb proximal->tip; mp 1-4) + hand_poses[2] = mp[1].copy() + hand_poses[3] = mp[2].copy() + hand_poses[4] = mp[3].copy() + hand_poses[5] = mp[4].copy() + + # Index (OpenXR 7-10 correspond; mp 5-8) + hand_poses[7] = mp[5].copy() + hand_poses[8] = mp[6].copy() + hand_poses[9] = mp[7].copy() + hand_poses[10] = mp[8].copy() + + # Middle (OpenXR 12-15 correspond; mp 9-12) + hand_poses[12] = mp[9].copy() + hand_poses[13] = mp[10].copy() + hand_poses[14] = mp[11].copy() + hand_poses[15] = mp[12].copy() + + # Ring (OpenXR 17-20 correspond; mp 13-16) + hand_poses[17] = mp[13].copy() + hand_poses[18] = mp[14].copy() + hand_poses[19] = mp[15].copy() + hand_poses[20] = mp[16].copy() + + # Pinky (OpenXR 22-25 correspond; mp 17-20) + hand_poses[22] = mp[17].copy() + hand_poses[23] = mp[18].copy() + hand_poses[24] = mp[19].copy() + hand_poses[25] = mp[20].copy() + + # Fill missing OpenXR indices with zeros (wrapper only uses a subset, but convert_hand_joints + # iterates values() in key order when building 21 joints). + for i in range(26): + if i not in hand_poses: + hand_poses[i] = np.zeros(3, dtype=np.float64) + + # Wrist quaternion: wrapper expects hand_poses["wrist"] = [x,y,z, w,qx,qy,qz] + quat_xyzw = R.from_matrix(wrist_rot).as_quat() # (x,y,z,w) + qx, qy, qz, qw = quat_xyzw + hand_poses["wrist"] = np.array([wrist_xyz[0], wrist_xyz[1], wrist_xyz[2], qw, qx, qy, qz], dtype=np.float64) + + return hand_poses + + +def main(): + from wato_dex_retargeting_utils import WatoHandDexRetargeting + from single_hand_detector import SingleHandDetector + + print("Testing dex_retargeting library (WatoHandDexRetargeting) for custom arm/hand ...") + dex = WatoHandDexRetargeting(hand_joint_names=WATO_HAND_JOINT_NAMES) + + detector = SingleHandDetector(hand_type="Right", selfie=False) + + cap = cv2.VideoCapture(0) + if not cap.isOpened(): + raise RuntimeError("Failed to open webcam (cv2.VideoCapture(0)).") + + print("Running live webcam retargeting. Press 'q' in the preview window to quit.") + while True: + ok, bgr = cap.read() + if not ok: + continue + + rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) + _, joint_pos, keypoint_2d, wrist_rot = detector.detect(rgb) + + if joint_pos is None: + cv2.imshow("wato_dex_retargeting_live_right", bgr) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + continue + + # Visualize landmarks if you want quick feedback. + if keypoint_2d is not None: + bgr = detector.draw_skeleton_on_image(bgr, keypoint_2d, style="default") + + # Build OpenXR-style pose dict and retarget. + right_poses = build_right_openxr_pose_from_mediapipe( + mediapipe_joint_pos=joint_pos, + wrist_rot=wrist_rot, + ) + right_q = dex.compute_right(right_poses) + + print("Right retargeted (15 DOF hand):", right_q) + cv2.imshow("wato_dex_retargeting_live_right", bgr) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cap.release() + cv2.destroyAllWindows() + + print("Joint names:", dex.get_right_joint_names()) + print("Dex retargeting live test done.") + + +if __name__ == "__main__": + main() diff --git a/autonomy/simulation/Teleop/wato_hand/single_hand_detector.py b/autonomy/simulation/Teleop/wato_hand/single_hand_detector.py new file mode 100644 index 0000000..3a0a0ec --- /dev/null +++ b/autonomy/simulation/Teleop/wato_hand/single_hand_detector.py @@ -0,0 +1,153 @@ +# Adapted from the dex-retargeting library at dex-retargeting/example/vector_retargeting/single_hand_detector.py + +import mediapipe as mp +import numpy as np +from typing import Any + +OPERATOR2MANO_RIGHT = np.array( + [ + [0, 0, -1], + [-1, 0, 0], + [0, 1, 0], + ] +) + +OPERATOR2MANO_LEFT = np.array( + [ + [0, 0, -1], + [1, 0, 0], + [0, -1, 0], + ] +) + + +class SingleHandDetector: + def __init__( + self, + hand_type="Right", + min_detection_confidence=0.8, + min_tracking_confidence=0.8, + selfie=False, + ): + self.hand_detector = mp.solutions.hands.Hands( + static_image_mode=False, + max_num_hands=1, + min_detection_confidence=min_detection_confidence, + min_tracking_confidence=min_tracking_confidence, + ) + self.selfie = selfie + self.operator2mano = ( + OPERATOR2MANO_RIGHT if hand_type == "Right" else OPERATOR2MANO_LEFT + ) + inverse_hand_dict = {"Right": "Left", "Left": "Right"} + self.detected_hand_type = hand_type if selfie else inverse_hand_dict[hand_type] + + @staticmethod + def draw_skeleton_on_image( + image, keypoint_2d: Any, style="white" + ): + if style == "default": + mp.solutions.drawing_utils.draw_landmarks( + image, + keypoint_2d, + mp.solutions.hands.HAND_CONNECTIONS, + mp.solutions.drawing_styles.get_default_hand_landmarks_style(), + mp.solutions.drawing_styles.get_default_hand_connections_style(), + ) + elif style == "white": + landmark_style = {} + for landmark in mp.solutions.hands.HandLandmark: + landmark_style[landmark] = mp.solutions.drawing_utils.DrawingSpec( + color=(255, 48, 48), circle_radius=4, thickness=-1 + ) + connections = mp.solutions.hands_connections.HAND_CONNECTIONS + connection_style = {} + for pair in connections: + connection_style[pair] = mp.solutions.drawing_utils.DrawingSpec(thickness=2) + + mp.solutions.drawing_utils.draw_landmarks( + image, + keypoint_2d, + mp.solutions.hands.HAND_CONNECTIONS, + landmark_style, + connection_style, + ) + + return image + + def detect(self, rgb): + results = self.hand_detector.process(rgb) + if not results.multi_hand_landmarks: + return 0, None, None, None + + desired_hand_num = -1 + for i in range(len(results.multi_hand_landmarks)): + label = results.multi_handedness[i].ListFields()[0][1][0].label + if label == self.detected_hand_type: + desired_hand_num = i + break + if desired_hand_num < 0: + return 0, None, None, None + + keypoint_3d = results.multi_hand_world_landmarks[desired_hand_num] + keypoint_2d = results.multi_hand_landmarks[desired_hand_num] + num_box = len(results.multi_hand_landmarks) + + keypoint_3d_array = self.parse_keypoint_3d(keypoint_3d) + keypoint_3d_array = keypoint_3d_array - keypoint_3d_array[0:1, :] + mediapipe_wrist_rot = self.estimate_frame_from_hand_points(keypoint_3d_array) + joint_pos = keypoint_3d_array @ mediapipe_wrist_rot @ self.operator2mano + return num_box, joint_pos, keypoint_2d, mediapipe_wrist_rot + + @staticmethod + def parse_keypoint_3d( + keypoint_3d: Any, + ) -> np.ndarray: + keypoint = np.empty([21, 3]) + for i in range(21): + keypoint[i][0] = keypoint_3d.landmark[i].x + keypoint[i][1] = keypoint_3d.landmark[i].y + keypoint[i][2] = keypoint_3d.landmark[i].z + return keypoint + + @staticmethod + def parse_keypoint_2d( + keypoint_2d: Any, img_size + ) -> np.ndarray: + keypoint = np.empty([21, 2]) + for i in range(21): + keypoint[i][0] = keypoint_2d.landmark[i].x + keypoint[i][1] = keypoint_2d.landmark[i].y + keypoint = keypoint * np.array([img_size[1], img_size[0]])[None, :] + return keypoint + + @staticmethod + def estimate_frame_from_hand_points(keypoint_3d_array: np.ndarray) -> np.ndarray: + """ + Compute the 3D coordinate frame (orientation only) from detected 3d key points + :param points: keypoint3 detected from MediaPipe detector. Order: [wrist, index, middle, pinky] + :return: the coordinate frame of wrist in MANO convention + """ + assert keypoint_3d_array.shape == (21, 3) + points = keypoint_3d_array[[0, 5, 9], :] + + # Compute vector from palm to the first joint of middle finger + x_vector = points[0] - points[2] + + # Normal fitting with SVD + points = points - np.mean(points, axis=0, keepdims=True) + u, s, v = np.linalg.svd(points) + + normal = v[2, :] + + # Gram–Schmidt Orthonormalize + x = x_vector - np.sum(x_vector * normal) * normal + x = x / np.linalg.norm(x) + z = np.cross(x, normal) + + # We assume that the vector from pinky to index is similar the z axis in MANO convention + if np.sum(z * (points[1] - points[2])) < 0: + normal *= -1 + z *= -1 + frame = np.stack([x, normal, z], axis=1) + return frame \ No newline at end of file diff --git a/autonomy/simulation/Teleop/wato_hand/wato_dex_retargeting_utils.py b/autonomy/simulation/Teleop/wato_hand/wato_dex_retargeting_utils.py index 6cb2351..2a7ff1c 100644 --- a/autonomy/simulation/Teleop/wato_hand/wato_dex_retargeting_utils.py +++ b/autonomy/simulation/Teleop/wato_hand/wato_dex_retargeting_utils.py @@ -5,6 +5,8 @@ import logging import os +import re +import tempfile import numpy as np import torch @@ -52,6 +54,32 @@ _LEFT_HAND_JOINT_NAMES = _WATO_HAND_JOINT_NAMES _RIGHT_HAND_JOINT_NAMES = _WATO_HAND_JOINT_NAMES +# dex_retargeting's RobotWrapper requires model.nq == model.nv. Pinocchio models +# continuous joints as nq=2, nv=1, so we pass a URDF with continuous -> revolute + limits. +_DEX_LIMIT_LINE = ' ' + + +def _urdf_continuous_to_revolute_for_dex(source_urdf_path: str) -> str: + """Write a temp URDF with continuous joints replaced by revolute + limits so nq==nv.""" + with open(source_urdf_path) as f: + content = f.read() + content = re.sub(r'\btype="continuous"', 'type="revolute"', content) + # Add limit for joints that had no limit (the two former continuous ones) + for joint_name in ("shoulder_flexion_extension", "shoulder_rotation"): + pattern = ( + r'(]*>.*?)' + r'\s*()' + ) + content = re.sub(pattern, r'\1\n' + _DEX_LIMIT_LINE + r'\n \2', content, count=1, flags=re.DOTALL) + fd, path = tempfile.mkstemp(suffix=".urdf", prefix="wato_dex_") + try: + with os.fdopen(fd, "w") as f: + f.write(content) + except Exception: + os.unlink(path) + raise + return path + class WatoHandDexRetargeting: """Hand retargeting for Wato arm_assembly hand. @@ -83,11 +111,16 @@ def __init__( local_left_urdf_path = left_hand_urdf_path if left_hand_urdf_path else default_urdf local_right_urdf_path = right_hand_urdf_path if right_hand_urdf_path else default_urdf + # dex_retargeting RobotWrapper requires nq==nv; Pinocchio gives nq!=nv for continuous joints. + # Use a temp URDF with continuous -> revolute + limits. + self._dex_left_urdf_path = _urdf_continuous_to_revolute_for_dex(local_left_urdf_path) + self._dex_right_urdf_path = _urdf_continuous_to_revolute_for_dex(local_right_urdf_path) + left_config_path = os.path.join(config_dir, left_hand_config_filename) right_config_path = os.path.join(config_dir, right_hand_config_filename) - self._update_yaml_with_urdf_path(left_config_path, local_left_urdf_path) - self._update_yaml_with_urdf_path(right_config_path, local_right_urdf_path) + self._update_yaml_with_urdf_path(left_config_path, self._dex_left_urdf_path) + self._update_yaml_with_urdf_path(right_config_path, self._dex_right_urdf_path) self._dex_left_hand = RetargetingConfig.load_from_file(left_config_path).build() self._dex_right_hand = RetargetingConfig.load_from_file(right_config_path).build() @@ -187,12 +220,15 @@ def compute_one_hand( indices=retargeting.optimizer.target_link_human_indices, retargeting_type=retargeting.optimizer.retargeting_type, ) + # Arm joints (non-hand) are fixed during hand retargeting; use robot neutral for them. + robot = retargeting.optimizer.robot + fixed_qpos = np.array(robot.q0[retargeting.optimizer.idx_pin2fixed], dtype=np.float64) # Enable gradient calculation and inference mode in case some other script has disabled it # This is necessary for the retargeting to work since it uses gradient features that # are not available in inference mode with torch.enable_grad(): with torch.inference_mode(False): - return retargeting.retarget(ref_value) + return retargeting.retarget(ref_value, fixed_qpos=fixed_qpos) def get_joint_names(self) -> list[str]: """Returns list of all joint names."""