diff --git a/.gitignore b/.gitignore
index cbe06491..040955d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -179,7 +179,8 @@ materials
 embodichain/toolkits/outputs/*
 embodichain/toolkits/outputs/*
 
-embodichain/database/*
+embodichain/database/tmp/*
+embodichain/database/train/*
 
 3rdparty/
 Log/
diff --git a/configs/gym/agent/pour_water_agent/agent_config.json b/configs/gym/agent/pour_water_agent/agent_config.json
new file mode 100644
index 00000000..b54f90c1
--- /dev/null
+++ b/configs/gym/agent/pour_water_agent/agent_config.json
@@ -0,0 +1,31 @@
+{    "TaskAgent": {
+        "prompt_name": "one_stage_prompt"
+    },
+    "CodeAgent": {
+        "prompt_name": "one_stage_prompt_according_to_task_plan"
+    },
+    "Agent": {
+        "prompt_kwargs": {
+            "task_prompt": {
+                "type": "text",
+                "name": "task_prompt.txt"
+            },
+            "basic_background": {
+                "type": "text",
+                "name": "basic_background.txt"
+            },
+            "atom_actions": {
+                "type": "text",
+                "name": "atom_actions.txt"
+            },
+            "code_prompt": {
+                "type": "text",
+                "name": "code_prompt.txt"
+            },
+            "code_example": {
+                "type": "text",
+                "name": "code_example.txt"
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/configs/gym/agent/pour_water_agent/agent_config_dual.json b/configs/gym/agent/pour_water_agent/agent_config_dual.json
new file mode 100644
index 00000000..b243bb65
--- /dev/null
+++ b/configs/gym/agent/pour_water_agent/agent_config_dual.json
@@ -0,0 +1,31 @@
+{    "TaskAgent": {
+        "prompt_name": "one_stage_prompt"
+    },
+    "CodeAgent": {
+        "prompt_name": "one_stage_prompt_according_to_task_plan"
+    },
+    "Agent": {
+        "prompt_kwargs": {
+            "task_prompt": {
+                "type": "text",
+                "name": "task_prompt_dual.txt"
+            },
+            "basic_background": {
+                "type": "text",
+                "name": "basic_background.txt"
+            },
+            "atom_actions": {
+                "type": "text",
+                "name": "atom_actions.txt"
+            },
+            "code_prompt": {
+                "type": "text",
+                "name": "code_prompt.txt"
+            },
+            "code_example": {
+                "type": "text",
+                "name": "code_example.txt"
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/configs/gym/agent/pour_water_agent/fast_gym_config.json b/configs/gym/agent/pour_water_agent/fast_gym_config.json
new file mode 100644
index 00000000..10a3d9cf
--- /dev/null
+++ b/configs/gym/agent/pour_water_agent/fast_gym_config.json
@@ -0,0 +1,392 @@
+{   
+    "id": "PourWaterAgent-v3",
+    "max_episodes": 5,
+    "env": {
+        "events": {
+            "init_table_pose": {
+                "func": "randomize_rigid_object_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "table"},
+                    "position_range": [[0.0, 0.0, -0.04], [0.0, 0.0, 0.04]],
+                    "relative_position": true                
+                }
+            },
+            "init_bottle_pose": {
+                "func": "randomize_rigid_object_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "bottle"},
+                    "position_range": [[-0.08, -0.12, 0.0], [0.08, 0.04, 0.0]],
+                    "relative_position": true                
+                }
+            },
+            "init_cup_pose": {
+                "func": "randomize_rigid_object_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "cup"},
+                    "position_range": [[-0.08, -0.04, 0.0], [0.04, 0.06, 0.0]],
+                    "relative_position": true                
+                }
+            },
+            "prepare_extra_attr": {
+                "func": "prepare_extra_attr",
+                "mode": "reset",
+                "params": {
+                    "attrs": [
+                        {
+                            "name": "object_lengths",
+                            "mode": "callable",
+                            "entity_uids": "all_objects",
+                            "func_name": "compute_object_length",
+                            "func_kwargs": {
+                                "is_svd_frame": true,
+                                "sample_points": 5000
+                            }
+                        },
+                        {
+                            "name": "grasp_pose_object",
+                            "mode": "static",
+                            "entity_cfg": {
+                                "uid": "bottle"
+                            },
+                            "value": [[
+                                    [0.32243,  0.03245,  0.94604,  0.025],
+                                    [0.00706, -0.99947,  0.03188, -0.0  ],
+                                    [0.94657, -0.0036 , -0.32249,  0.0  ],
+                                    [0.0    ,  0.0    ,  0.0    ,  1.0  ]
+                            ]]
+                        },
+                        {
+                            "name": "grasp_pose_object",
+                            "mode": "static",
+                            "entity_cfg": {
+                                "uid": "cup"
+                            },
+                            "value": [[
+                                [ 0.32039, -0.03227,  0.94673,  0.0    ],
+                                [ 0.00675, -0.99932, -0.03635,  0.0    ],
+                                [ 0.94726,  0.01803, -0.31996,  0.0    ],
+                                [ 0.0    ,  0.0    ,  0.0    ,  1.0    ]
+                            ]]
+                        },
+                        {
+                            "name": "left_arm_base_pose",
+                            "mode": "callable",
+                            "entity_cfg": {
+                                "uid": "CobotMagic"
+                            },
+                            "func_name": "get_link_pose",
+                            "func_kwargs": {
+                                "link_name": "left_arm_base",
+                                "to_matrix": true
+                            }
+                        },
+                        {
+                            "name": "right_arm_base_pose",
+                            "mode": "callable",
+                            "entity_cfg": {
+                                "uid": "CobotMagic"
+                            },
+                            "func_name": "get_link_pose",
+                            "func_kwargs": {
+                                "link_name": "right_arm_base",
+                                "to_matrix": true
+                            }
+                        }
+                    ]
+                }
+            },
+            "register_info_to_env": {
+                "func": "register_info_to_env",
+                "mode": "reset",
+                "params": {
+                    "registry": [
+                        {
+                            "entity_cfg": {
+                                "uid": "bottle"
+                            },
+                            "pose_register_params": {
+                                "compute_relative": false,
+                                "compute_pose_object_to_arena": true,
+                                "to_matrix": true
+                            }
+                        },
+                        {
+                            "entity_cfg": {
+                                "uid": "cup"
+                            },
+                            "pose_register_params": {
+                                "compute_relative": false,
+                                "compute_pose_object_to_arena": true,
+                                "to_matrix": true
+                            }
+                        },
+                        {
+                            "entity_cfg": {
+                                "uid": "CobotMagic",
+                                "control_parts": ["left_arm"]
+                            },
+                            "attrs": ["left_arm_base_pose"],
+                            "pose_register_params": {
+                                "compute_relative": "cup",
+                                "compute_pose_object_to_arena": false,
+                                "to_matrix": true
+                            },
+                            "prefix": false
+                        },
+                        {
+                            "entity_cfg": {
+                                "uid": "CobotMagic",
+                                "control_parts": ["right_arm"]
+                            },
+                            "attrs": ["right_arm_base_pose"],
+                            "pose_register_params": {
+                                "compute_relative": "bottle",
+                                "compute_pose_object_to_arena": false,
+                                "to_matrix": true
+                            },
+                            "prefix": false
+                        }
+                    ],
+                    "registration": "affordance_datas",
+                    "sim_update": true
+                }
+            },
+            "random_table_material": {
+                "func": "randomize_visual_material",
+                "mode": "reset",
+                "interval_step": 2,
+                "params": {
+                    "entity_cfg": {"uid": "table"},
+                    "random_texture_prob": 1.0,
+                    "texture_path": "DexsimMaterials/WoodTable"
+                }
+            },
+            "random_bottle_material": {
+                "func": "randomize_visual_material",
+                "mode": "reset",
+                "interval_step": 2,
+                "params": {
+                    "entity_cfg": {"uid": "bottle"},
+                    "random_texture_prob": 1.0,
+                    "texture_path": "DexsimMaterials/Bottle"
+                }
+            },
+            "random_cup_material": {
+                "func": "randomize_visual_material",
+                "mode": "reset",
+                "interval_step": 2,
+                "params": {
+                    "entity_cfg": {"uid": "cup"},
+                    "random_texture_prob": 1.0,
+                    "texture_path": "DexsimMaterials/Cup"
+                }
+            },
+            "random_robot_init_eef_pose": {
+                "func": "randomize_robot_eef_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "CobotMagic", "control_parts": ["left_arm", "right_arm"]},
+                    "position_range":  [[-0.01, -0.01, -0.01], [0.01, 0.01, 0]]
+                }
+            },
+            "record_camera": {
+                "func": "record_camera_data",
+                "mode": "interval",
+                "interval_step": 1,
+                "params": {
+                    "name": "cam1",
+                    "resolution": [320, 240],
+                    "eye": [2, 0, 2],
+                    "target": [0.5, 0, 1]
+                }
+            }
+        },
+        "dataset": {
+            "lerobot": {
+                "func": "embodichain.lab.gym.envs.managers.datasets:LeRobotRecorder",
+                "mode": "save",
+                "params": {
+                    "robot_meta": {
+                        "arm_dofs": 12,
+                        "control_freq": 25,
+                        "control_parts": ["left_arm", "left_eef", "right_arm", "right_eef"],
+                        "observation": {
+                            "vision": {},
+                            "states": ["qpos"]
+                        },
+                        "min_len_steps": 5
+                    },
+                    "instruction": {
+                        "lang": "Pour water from the bottle into the mug."
+                    }
+                }
+            }
+        },
+        "success_params": {
+            "strict": false
+        }
+    },
+    "robot": {
+        "uid": "CobotMagic",
+        "robot_type": "CobotMagic",
+        "init_pos": [0.0, 0.0, 0.7775],
+        "init_qpos": [-0.3,0.3,1.0,1.0,-1.2,-1.2,0.0,0.0,0.6,0.6,0.0,0.0,0.05,0.05,0.05,0.05]
+    },
+    "sensor": [
+        {
+            "sensor_type": "StereoCamera",
+            "uid": "cam_high",
+            "width": 960,
+            "height": 540,
+            "enable_mask": false,
+            "enable_depth": false,
+            "left_to_right_pos": [0.059684025824163614, 0, 0],
+            "intrinsics": [453.851402686215, 453.8347628855552, 469.827725021235, 258.6656181845155],
+            "intrinsics_right": [453.4536601653505, 453.3306024582175, 499.13697412367776, 297.7176248477935],
+            "extrinsics": {
+                "eye": [0.35368482807598, 0.014695524383058989, 1.4517046071614774],
+                "target": [0.7186357573287919, -0.054534732904795505, 0.5232553674540066],
+                "up": [0.9306678549330372, -0.0005600064212467153, 0.3658647703553347]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "cam_right_wrist",
+            "width": 640,
+            "height": 480,
+            "enable_mask": false,
+            "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812],
+            "extrinsics": {
+                "parent": "right_link6",
+                "pos": [-0.08, 0.0, 0.04],
+                "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "cam_left_wrist",
+            "width": 640,
+            "height": 480,
+            "enable_mask": false,
+            "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812],
+            "extrinsics": {
+                "parent": "left_link6",
+                "pos": [-0.08, 0.0, 0.04],
+                "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "valid_cam_1",
+            "width": 1280,
+            "height": 960,
+            "enable_mask": false,
+            "intrinsics": [1400, 1400, 640, 480],
+            "extrinsics": {
+                "eye": [1.7, 0, 2.3],
+                "target": [0.6, 0, 0.8]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "valid_cam_2",
+            "width": 1280,
+            "height": 960,
+            "enable_mask": false,
+            "intrinsics": [1400, 1400, 640, 480],
+            "extrinsics": {
+                "eye": [2.0, 0, 1.8],
+                "target": [0.7, 0, 0.9]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "valid_cam_3",
+            "width": 1280,
+            "height": 960,
+            "enable_mask": false,
+            "intrinsics": [1400, 1400, 640, 480],
+            "extrinsics": {
+                "eye": [2.0, 0, 1.3],
+                "target": [0.7, 0, 0.9]
+            }
+        }
+    ],
+    "light": {
+        "direct": [
+            {
+                "uid": "light_1",
+                "light_type": "point",
+                "color": [1.0, 1.0, 1.0],
+                "intensity": 100.0,
+                "init_pos": [2, 0, 2],
+                "radius": 20.0
+            }
+        ]
+    },
+    "background": [
+        {
+            "uid": "table",
+            "shape": {
+                "shape_type": "Mesh",
+                "fpath": "CircleTableSimple/circle_table_simple.ply",
+                "compute_uv": true
+            },
+            "attrs" : {
+                "mass": 10.0,
+                "static_friction": 0.95,
+                "dynamic_friction": 0.9,
+                "restitution": 0.01
+            },
+            "body_scale": [1, 1, 1],
+            "body_type": "kinematic",
+            "init_pos": [0.725, 0.0, 0.825],
+            "init_rot": [0, 90, 0]
+        }
+    ],
+    "rigid_object": [
+        {
+            "uid":"cup",
+            "shape": {
+                "shape_type": "Mesh",
+                "fpath": "PaperCup/paper_cup.ply",
+                "compute_uv": true
+            },
+            "attrs" : {
+                "mass": 0.01,
+                "contact_offset": 0.003,
+                "rest_offset": 0.001,
+                "restitution": 0.01,
+                "max_depenetration_velocity": 1e1,
+                "min_position_iters": 32,
+                "min_velocity_iters":8
+            },
+            "init_pos": [0.75, 0.1, 0.9],
+            "body_scale":[0.75, 0.75, 1.0],
+            "max_convex_hull_num": 8
+        },
+        {
+            "uid":"bottle",
+            "shape": {
+                "shape_type": "Mesh",
+                "fpath": "ScannedBottle/kashijia_processed.ply",
+                "compute_uv": true
+            },
+            "attrs" : {
+                "mass": 0.01,
+                "contact_offset": 0.003,
+                "rest_offset": 0.001,
+                "restitution": 0.01,
+                "max_depenetration_velocity": 1e1,
+                "min_position_iters": 32,
+                "min_velocity_iters":8
+            },
+            "init_pos": [0.75, -0.1, 0.932],
+            "body_scale":[1, 1, 1],
+            "max_convex_hull_num": 8
+        }
+    ]
+}
\ No newline at end of file
diff --git a/configs/gym/agent/pour_water_agent/task_prompt.txt b/configs/gym/agent/pour_water_agent/task_prompt.txt
new file mode 100644
index 00000000..be7eb3b1
--- /dev/null
+++ b/configs/gym/agent/pour_water_agent/task_prompt.txt
@@ -0,0 +1,5 @@
+Task:
+Use a single robotic arm to pour water from the bottle into the cup.
+Position the bottle at an offset of [0.05, −0.10, 0.125] relative to the cup’s position during pouring.
+After completing the pour, return the bottle to the location [0.7, −0.1].
+The cup should remain stationary throughout the task.
\ No newline at end of file
diff --git a/configs/gym/agent/pour_water_agent/task_prompt_dual.txt b/configs/gym/agent/pour_water_agent/task_prompt_dual.txt
new file mode 100644
index 00000000..6b2205e2
--- /dev/null
+++ b/configs/gym/agent/pour_water_agent/task_prompt_dual.txt
@@ -0,0 +1,5 @@
+Task:
+Use both arms to pour water from the bottle into the cup.
+First, grasp the bottle with right arm and the cup with left arm simultaneously. Then lift the cup by 0.10 m, and then move it to [0.55, 0.05] to prepare for pouring.
+Then hold the bottle at a relative offset of [0.05, −0.10, 0.125] with respect to the cup for starting pouring.
+After pouring, place the bottle to [0.7, −0.1] and place the cup at [0.6, 0.1] simultaneously.
\ No newline at end of file
diff --git a/configs/gym/agent/rearrangement_agent_v3/agent_config.json b/configs/gym/agent/rearrangement_agent_v3/agent_config.json
new file mode 100644
index 00000000..6dbe2f5e
--- /dev/null
+++ b/configs/gym/agent/rearrangement_agent_v3/agent_config.json
@@ -0,0 +1,31 @@
+{    "TaskAgent": {
+        "prompt_name": "one_stage_prompt"
+    },
+    "CodeAgent": {
+        "prompt_name": "one_stage_prompt_according_to_task_plan"
+    },
+    "Agent": {
+        "prompt_kwargs": {
+            "task_prompt": {
+                "type": "text",
+                "name": "task_prompt.txt"
+            },
+            "basic_background": {
+                "type": "text",
+                "name": "basic_background.txt"
+            },
+            "atom_actions": {
+                "type": "text",
+                "name": "atom_actions.txt"
+            },
+            "code_prompt": {
+                "type": "text",
+                "name": "code_prompt.txt"
+            },
+            "code_example": {
+                "type": "text",
+                "name": "code_example.txt"
+            }
+        }
+    }
+}
diff --git a/configs/gym/agent/rearrangement_agent_v3/fast_gym_config.json b/configs/gym/agent/rearrangement_agent_v3/fast_gym_config.json
new file mode 100644
index 00000000..5cc6daad
--- /dev/null
+++ b/configs/gym/agent/rearrangement_agent_v3/fast_gym_config.json
@@ -0,0 +1,384 @@
+{   
+    "id": "RearrangementAgent-v3",
+    "max_episodes": 10,
+    "env": {
+        "events": {
+            "init_table_pose": {
+                "func": "randomize_rigid_object_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "table"},
+                    "position_range": [[0.0, 0.0, -0.04], [0.0, 0.0, 0.04]],
+                    "relative_position": true                
+                }
+            },
+            "init_fork_pose": {
+                "func": "randomize_rigid_object_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "fork"},
+                    "position_range": [[0.0, -0.05, 0.0], [0.1, 0.05, 0.0]],
+                    "rotation_range": [[0, 0, -45], [0, 0, 45]],
+                    "relative_position": true,                
+                    "relative_rotation": true               
+                }
+            },
+            "init_spoon_pose": {
+                "func": "randomize_rigid_object_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "spoon"},
+                    "position_range": [[0.0, -0.05, 0.0], [0.1, 0.05, 0.0]],
+                    "rotation_range": [[0, 0, -45], [0, 0, 45]],
+                    "relative_position": true,
+                    "relative_rotation": true
+                }
+            },
+            "prepare_extra_attr": {
+                "func": "prepare_extra_attr",
+                "mode": "reset",
+                "params": {
+                    "attrs": [
+                        {
+                            "name": "object_lengths",
+                            "mode": "callable",
+                            "entity_uids": "all_objects",
+                            "func_name": "compute_object_length",
+                            "func_kwargs": {
+                                "is_svd_frame": true,
+                                "sample_points": 5000
+                            }
+                        },
+                        {
+                            "name": "grasp_pose_object",
+                            "mode": "static",
+                            "entity_uids": ["fork", "spoon"],
+                            "value": [[
+                                [1.0,  0.0,  0.0,  0.0],
+                                [0.0, -1.0,  0.0,  0.0],
+                                [0.0,  0.0, -1.0,  0.0],
+                                [0.0,  0.0,  0.0,  1.0]
+                            ]]
+                        }
+                    ]
+                }
+            },
+            "register_info_to_env": {
+                "func": "register_info_to_env",
+                "mode": "reset",
+                "params": {
+                    "registry": [
+                        {
+                            "entity_cfg": {
+                                "uid": "plate"
+                            },
+                            "pose_register_params": {
+                                "compute_relative": false,
+                                "compute_pose_object_to_arena": true,
+                                "to_matrix": true
+                            }
+                        },
+                        {
+                            "entity_cfg": {
+                                "uid": "fork"
+                            },
+                            "pose_register_params": {
+                                "compute_relative": false,
+                                "compute_pose_object_to_arena": true,
+                                "to_matrix": true
+                            }
+                        },
+                        {
+                            "entity_cfg": {
+                                "uid": "spoon"
+                            },
+                            "pose_register_params": {
+                                "compute_relative": false,
+                                "compute_pose_object_to_arena": true,
+                                "to_matrix": true
+                            }
+                        },
+                        {
+                            "entity_cfg": {
+                                "uid": "CobotMagic",
+                                "control_parts": ["left_arm"]
+                            },
+                            "attrs": ["left_arm_base_pose"],
+                            "pose_register_params": {
+                                "compute_relative": "spoon",
+                                "compute_pose_object_to_arena": false,
+                                "to_matrix": true
+                            },
+                            "prefix": false
+                        },
+                        {
+                            "entity_cfg": {
+                                "uid": "CobotMagic",
+                                "control_parts": ["right_arm"]
+                            },
+                            "attrs": ["right_arm_base_pose"],
+                            "pose_register_params": {
+                                "compute_relative": "fork",
+                                "compute_pose_object_to_arena": false,
+                                "to_matrix": true
+                            },
+                            "prefix": false
+                        }
+                    ],
+                    "registration": "affordance_datas",
+                    "sim_update": true
+                }
+            },
+            "random_table_material": {
+                "func": "randomize_visual_material",
+                "mode": "reset",
+                "interval_step": 2,
+                "params": {
+                    "entity_cfg": {"uid": "table"},
+                    "random_texture_prob": 1.0,
+                    "texture_path": "DexsimMaterials/WoodTable"
+                }
+            },
+            "random_plate_material": {
+                "func": "randomize_visual_material",
+                "mode": "reset",
+                "interval_step": 2,
+                "params": {
+                    "entity_cfg": {"uid": "plate"},
+                    "random_texture_prob": 1.0,
+                    "texture_path": "DexsimMaterials/Plate"
+                }
+            },
+            "random_fork_material": {
+                "func": "randomize_visual_material",
+                "mode": "reset",
+                "interval_step": 2,
+                "params": {
+                    "entity_cfg": {"uid": "fork"},
+                    "random_texture_prob": 1.0,
+                    "texture_path": "DexsimMaterials/Spoon"
+                }
+            },
+            "random_spoon_material": {
+                "func": "randomize_visual_material",
+                "mode": "reset",
+                "interval_step": 2,
+                "params": {
+                    "entity_cfg": {"uid": "spoon"},
+                    "random_texture_prob": 1.0,
+                    "texture_path": "DexsimMaterials/Spoon"
+                }
+            },
+            "random_robot_init_eef_pose": {
+                "func": "randomize_robot_eef_pose",
+                "mode": "reset",
+                "params": {
+                    "entity_cfg": {"uid": "CobotMagic", "control_parts": ["left_arm", "right_arm"]},
+                    "position_range":  [[-0.01, -0.01, -0.01], [0.01, 0.01, 0]]
+                }
+            },
+            "record_camera": {
+                "func": "record_camera_data",
+                "mode": "interval",
+                "interval_step": 1,
+                "params": {
+                    "name": "cam1",
+                    "resolution": [320, 240],
+                    "eye": [2, 0, 2],
+                    "target": [0.5, 0, 1]
+                }
+            }
+        },
+        "dataset": {
+            "lerobot": {
+                "func": "embodichain.lab.gym.envs.managers.datasets:LeRobotRecorder",
+                "mode": "save",
+                "params": {
+                    "robot_meta": {
+                        "arm_dofs": 12,
+                        "control_freq": 25,
+                        "control_parts": ["left_arm", "left_eef", "right_arm", "right_eef"],
+                        "observation": {
+                            "vision": {},
+                            "states": ["qpos"]
+                        },
+                        "min_len_steps": 125
+                    },
+                    "instruction": {
+                        "lang": "Place the spoon and fork neatly into the plate on the table."
+                    }
+                }
+            }
+        },
+        "success_params": {
+            "strict": false
+        }
+    },
+    "robot": {
+        "uid": "CobotMagic",
+        "robot_type": "CobotMagic",
+        "init_pos": [0.0, 0.0, 0.7775],
+        "init_qpos": [-0.3,0.3,1.0,1.0,-1.2,-1.2,0.0,0.0,0.6,0.6,0.0,0.0,0.05,0.05,0.05,0.05]
+    },
+    "sensor": [
+        {
+            "sensor_type": "StereoCamera",
+            "uid": "cam_high",
+            "width": 960,
+            "height": 540,
+            "enable_mask": false,
+            "enable_depth": false,
+            "left_to_right_pos": [0.059684025824163614, 0, 0],
+            "intrinsics": [453.851402686215, 453.8347628855552, 469.827725021235, 258.6656181845155],
+            "intrinsics_right": [453.4536601653505, 453.3306024582175, 499.13697412367776, 297.7176248477935],
+            "extrinsics": {
+                "eye": [0.35368482807598, 0.014695524383058989, 1.4517046071614774],
+                "target": [0.7186357573287919, -0.054534732904795505, 0.5232553674540066],
+                "up": [0.9306678549330372, -0.0005600064212467153, 0.3658647703553347]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "cam_right_wrist",
+            "width": 640,
+            "height": 480,
+            "enable_mask": false,
+            "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812],
+            "extrinsics": {
+                "parent": "right_link6",
+                "pos": [-0.08, 0.0, 0.04],
+                "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "cam_left_wrist",
+            "width": 640,
+            "height": 480,
+            "enable_mask": false,
+            "intrinsics": [488.1665344238281, 488.1665344238281, 322.7323303222656, 213.17434692382812],
+            "extrinsics": {
+                "parent": "left_link6",
+                "pos": [-0.08, 0.0, 0.04],
+                "quat": [0.15304635, 0.69034543, -0.69034543, -0.15304635]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "valid_cam_1",
+            "width": 1280,
+            "height": 960,
+            "enable_mask": false,
+            "intrinsics": [1400, 1400, 640, 480],
+            "extrinsics": {
+                "eye": [1.7, 0, 2.3],
+                "target": [0.6, 0, 0.8]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "valid_cam_2",
+            "width": 1280,
+            "height": 960,
+            "enable_mask": false,
+            "intrinsics": [1400, 1400, 640, 480],
+            "extrinsics": {
+                "eye": [2.0, 0, 1.8],
+                "target": [0.7, 0, 0.9]
+            }
+        },
+        {
+            "sensor_type": "Camera",
+            "uid": "valid_cam_3",
+            "width": 1280,
+            "height": 960,
+            "enable_mask": false,
+            "intrinsics": [1400, 1400, 640, 480],
+            "extrinsics": {
+                "eye": [2.0, 0, 1.3],
+                "target": [0.7, 0, 0.9]
+            }
+        }
+    ],
+    "light": {
+        "direct": [
+            {
+                "uid": "light_1",
+                "light_type": "point",
+                "color": [1.0, 1.0, 1.0],
+                "intensity": 100.0,
+                "init_pos": [2, 0, 2],
+                "radius": 20.0
+            }
+        ]
+    },
+    "background": [
+        {
+            "uid": "table",
+            "shape": {
+                "shape_type": "Mesh",
+                "fpath": "CircleTableSimple/circle_table_simple.ply",
+                "compute_uv": true
+            },
+            "attrs" : {
+                "mass": 10.0,
+                "static_friction": 2.0,
+                "dynamic_friction": 1.5,
+                "restitution": 0.1
+            },
+            "body_scale": [1, 1, 1],
+            "body_type": "kinematic",
+            "init_pos": [0.725, 0.0, 0.691],
+            "init_rot": [0, 90, 0]
+        }
+    ],
+    "rigid_object": [
+        {
+            "uid": "plate",
+            "shape": {
+                "shape_type": "Mesh",
+                "fpath": "TableWare/tableware/plate/3_center.ply",
+                "compute_uv": true
+            },
+            "body_scale": [0.001, 0.001, 0.001],
+            "init_pos": [0.5, 0.0, 1.0],
+            "init_rot": [180, 0, 0]
+        },
+        {
+            "uid": "fork",
+            "shape": {
+                "shape_type": "Mesh",
+                "fpath": "TableWare/tableware/fork/standard_fork_scale.ply",
+                "compute_uv": true
+            },
+            "body_scale": [1.0, 1.0, 1.0],
+            "init_pos": [0.5, 0.21, 1.0],
+            "attrs" : {
+                "mass": 0.01,
+                "static_friction": 0.95,
+                "dynamic_friction": 0.9,
+                "restitution": 0.05,
+                "min_position_iters": 16,
+                "min_velocity_iters": 4
+            }
+        },
+        {
+            "uid": "spoon",
+            "shape": {
+                "shape_type": "Mesh",
+                "fpath": "TableWare/tableware/spoon/standard_spoon_a_rescale.ply",
+                "compute_uv": true
+            },
+            "body_scale": [1.0, 1.0, 1.0],
+            "init_pos": [0.5, -0.21, 1.0],
+            "attrs" : {
+                "mass": 0.01,
+                "static_friction": 0.95,
+                "dynamic_friction": 0.9,
+                "restitution": 0.05,
+                "min_position_iters": 16,
+                "min_velocity_iters": 4
+            }
+        }
+    ]
+}
diff --git a/configs/gym/agent/rearrangement_agent_v3/task_prompt.txt b/configs/gym/agent/rearrangement_agent_v3/task_prompt.txt
new file mode 100644
index 00000000..280bd227
--- /dev/null
+++ b/configs/gym/agent/rearrangement_agent_v3/task_prompt.txt
@@ -0,0 +1,8 @@
+Task:
+Use both arms to rearrange a fork and a spoon on opposite sides of a plate. Perform the following steps **simultaneously**.
+
+1. Grasp the fork with the left arm and the spoon with the right arm.
+
+2. Reorient both end-effectors to a downward-facing pose.
+
+3. Place the fork at y = +0.16 and the spoon at y = −0.16 relative to the plate’s center.
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 3aac3af3..6e392b52 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -9,6 +9,10 @@
 import os
 import sys
 
+os.environ.setdefault(
+    "AZURE_OPENAI_ENDPOINT", "https://mock-endpoint.openai.azure.com/"
+)
+os.environ.setdefault("AZURE_OPENAI_API_KEY", "mock-api-key-for-docs-build")
 
 sys.path.insert(0, os.path.abspath("../.."))
 
diff --git a/docs/source/features/agents.md b/docs/source/features/agents.md
new file mode 100644
index 00000000..7cb2356d
--- /dev/null
+++ b/docs/source/features/agents.md
@@ -0,0 +1,166 @@
+# EmbodiAgent
+
+EmbodiAgent is a hierarchical multi-agent system that enables robots to perform complex manipulation tasks through closed-loop planning, code generation, and validation. The system combines vision-language models (VLMs) and large language models (LLMs) to translate high-level goals into executable robot actions.
+
+## Quick Start
+
+### Prerequisites
+Ensure you have access to Azure OpenAI or a compatible LLM endpoint.
+
+```bash
+# Set environment variables
+export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/"
+export AZURE_OPENAI_API_KEY="your-api-key"
+```
+
+### Using Different LLM/VLM APIs
+
+The system uses LangChain's `AzureChatOpenAI` by default. To use different LLM/VLM providers, you can modify the `create_llm` function in `embodichain/agents/hierarchy/llm.py`.
+
+#### Azure OpenAI
+```bash
+export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/"
+export AZURE_OPENAI_API_KEY="your-api-key"
+export OPENAI_API_VERSION="2024-10-21"  # Optional, defaults to "2024-10-21"
+```
+
+#### OpenAI
+To use OpenAI directly instead of Azure, modify `llm.py`:
+```python
+from langchain_openai import ChatOpenAI
+
+def create_llm(*, temperature=0.0, model="gpt-4o"):
+    return ChatOpenAI(
+        temperature=temperature,
+        model=model,
+        api_key=os.getenv("OPENAI_API_KEY"),
+    )
+```
+
+Then set:
+```bash
+export OPENAI_API_KEY="your-api-key"
+```
+
+#### Other Providers
+You can use other LangChain-compatible providers by modifying the `create_llm` function, for example:
+
+**Anthropic Claude:**
+```python
+from langchain_anthropic import ChatAnthropic
+
+def create_llm(*, temperature=0.0, model="claude-3-opus-20240229"):
+    return ChatAnthropic(
+        temperature=temperature,
+        model=model,
+        anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"),
+    )
+```
+
+**Google Gemini:**
+```python
+from langchain_google_genai import ChatGoogleGenerativeAI
+
+def create_llm(*, temperature=0.0, model="gemini-pro"):
+    return ChatGoogleGenerativeAI(
+        temperature=temperature,
+        model=model,
+        google_api_key=os.getenv("GOOGLE_API_KEY"),
+    )
+```
+
+### Run the System
+
+Run the agent system with the following command:
+
+```bash
+python embodichain/lab/scripts/run_agent.py \
+    --task_name YourTask \
+    --gym_config configs/gym/your_task/gym_config.json \
+    --agent_config configs/gym/agent/your_agent/agent_config.json \
+    --regenerate False
+```
+
+**Parameters:**
+- `--task_name`: Name identifier for the task
+- `--gym_config`: Path to the gym environment configuration file
+- `--agent_config`: Path to the agent configuration file (defines prompts and agent behavior)
+- `--regenerate`: If `True`, forces regeneration of plans/code even if cached
+
+## System Architecture
+
+The system operates on a closed-loop control cycle:
+
+- **Observe**: The `TaskAgent` perceives the environment via multi-view camera inputs.
+- **Plan**: It decomposes the goal into natural language steps.
+- **Code**: The `CodeAgent` translates steps into executable Python code using atomic actions.
+- **Execute**: The code runs in the environment; runtime errors are caught immediately.
+- **Validate**: The `ValidationAgent` analyzes the result images, selects the best camera angle, and judges success.
+- **Refine**: If validation fails, feedback is sent back to the agents to regenerate the plan or code.
+
+---
+
+## Core Components
+
+### TaskAgent
+*Located in:* `embodichain/agents/hierarchy/task_agent.py`
+
+Responsible for high-level reasoning. It parses visual observations and outputs a structured plan.
+
+* For every step, it generates a specific condition (e.g., "The cup must be held by the gripper") which is used later by the ValidationAgent.
+* Prompt Strategies:
+    * `one_stage_prompt`: Direct VLM-to-Plan generation.
+    * `two_stage_prompt`: Separates visual analysis from planning logic.
+
+### CodeAgent
+*Located in:* `embodichain/agents/hierarchy/code_agent.py`
+
+Translates natural language plans into executable Python code using atomic actions from the action bank.
+
+* Generates Python code that follows strict coding guidelines (no loops, only provided APIs)
+* Executes code in a sandboxed environment with immediate error detection
+* Uses Abstract Syntax Tree (AST) parsing to ensure code safety and correctness
+* Supports few-shot learning through code examples in the configuration
+
+
+### ValidationAgent
+*Located in:* `embodichain/agents/hierarchy/validation_agent.py`
+
+Closes the loop by verifying if the robot actually achieved what it planned.
+
+* Uses a specialized LLM call (`select_best_view_dir`) to analyze images from all cameras and pick the single best angle that proves the action's outcome, ignoring irrelevant views.
+* If an error occurs (runtime or logic), it generates a detailed explanation which is fed back to the `TaskAgent` or `CodeAgent` for the next attempt.
+
+---
+
+## Configuration Guide
+
+The `Agent` configuration block controls the context provided to the LLMs. Prompt files are resolved in the following order:
+
+1. **Config directory**: Task-specific prompt files in the same directory as the agent configuration file (e.g., `configs/gym/agent/pour_water_agent/`)
+2. **Default prompts directory**: Reusable prompt templates in `embodichain/agents/prompts/`
+
+| Parameter | Description | Typical Use |
+| :--- | :--- | :--- |
+| `task_prompt` | Task-specific goal description | "Pour water from the red cup to the blue cup." |
+| `basic_background` | Physical rules & constraints | World coordinate system definitions, safety rules. |
+| `atom_actions` | API Documentation | List of available functions (e.g., `drive(action='pick', ...)`). |
+| `code_prompt` | Coding guidelines | "Use provided APIs only. Do not use loops." |
+| `code_example` | Few-shot examples | Previous successful code snippets to guide style. |
+
+---
+
+## File Structure
+
+```text
+embodichain/agents/
+├── hierarchy/
+│   ├── agent_base.py          # Abstract base handling prompts & images
+│   ├── task_agent.py          # Plan generation logic
+│   ├── code_agent.py          # Code generation & AST execution engine
+│   ├── validation_agent.py    # Visual analysis & view selection
+│   └── llm.py                 # LLM configuration and instances
+├── mllm/
+│   └── prompt/                # Prompt templates (LangChain)
+└── prompts/                   # Agent prompt templates
+```
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c0ac442e..f87cbd7d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -36,6 +36,7 @@ Table of Contents
    :caption: Features
    :glob:
 
+   features/agents.md
    features/workspace_analyzer/index*
 
 .. toctree::
diff --git a/embodichain/agents/hierarchy/__init__.py b/embodichain/agents/hierarchy/__init__.py
new file mode 100644
index 00000000..30c31bc9
--- /dev/null
+++ b/embodichain/agents/hierarchy/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from langchain_openai import AzureChatOpenAI
+from langchain_openai import ChatOpenAI
+import os
diff --git a/embodichain/agents/hierarchy/agent_base.py b/embodichain/agents/hierarchy/agent_base.py
new file mode 100644
index 00000000..12ba2c1d
--- /dev/null
+++ b/embodichain/agents/hierarchy/agent_base.py
@@ -0,0 +1,94 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from abc import ABCMeta
+import os
+from pathlib import Path
+from embodichain.utils.utility import load_txt
+import embodichain.agents.mllm.prompt as mllm_prompt
+from embodichain.data import database_2d_dir
+
+
+def _resolve_prompt_path(file_name: str, config_dir: str = None) -> str:
+    # If absolute path, use directly
+    if os.path.isabs(file_name):
+        if os.path.exists(file_name):
+            return file_name
+        raise FileNotFoundError(f"Prompt file not found: {file_name}")
+
+    # Try config directory first (for task-specific prompts)
+    if config_dir:
+        config_path = os.path.join(config_dir, file_name)
+        if os.path.exists(config_path):
+            return config_path
+
+    # Try agents/prompts directory (for reusable prompts)
+    agents_prompts_dir = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "prompts"
+    )
+    agents_path = os.path.join(agents_prompts_dir, file_name)
+    if os.path.exists(agents_path):
+        return agents_path
+
+    # If still not found, raise error with search paths
+    searched_paths = []
+    if config_dir:
+        searched_paths.append(f"  - {config_dir}/{file_name}")
+    searched_paths.append(f"  - {agents_prompts_dir}/{file_name}")
+
+    raise FileNotFoundError(
+        f"Prompt file not found: {file_name}\n"
+        f"Searched in:\n" + "\n".join(searched_paths)
+    )
+
+
+class AgentBase(metaclass=ABCMeta):
+    def __init__(self, **kwargs) -> None:
+
+        assert (
+            "prompt_kwargs" in kwargs.keys()
+        ), "Key prompt_kwargs must exist in config."
+
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
+        # Get config directory if provided
+        config_dir = kwargs.get("config_dir", None)
+        if config_dir:
+            config_dir = os.path.dirname(os.path.abspath(config_dir))
+
+        # Preload and store prompt contents inside self.prompt_kwargs
+        for key, val in self.prompt_kwargs.items():
+            if val["type"] == "text":
+                file_path = _resolve_prompt_path(val["name"], config_dir)
+                val["content"] = load_txt(file_path)  # ← store content here
+            else:
+                raise ValueError(
+                    f"Now only support `text` type but {val['type']} is given."
+                )
+
+    def generate(self, *args, **kwargs):
+        pass
+
+    def act(self, *args, **kwargs):
+        pass
+
+    def get_composed_observations(self, **kwargs):
+        ret = {"observations": kwargs.get("env").get_obs_for_agent()}
+        for key, val in self.prompt_kwargs.items():
+            ret[key] = val["content"]
+        ret.update(kwargs)
+        return ret
diff --git a/embodichain/agents/hierarchy/code_agent.py b/embodichain/agents/hierarchy/code_agent.py
new file mode 100644
index 00000000..3aa999a0
--- /dev/null
+++ b/embodichain/agents/hierarchy/code_agent.py
@@ -0,0 +1,288 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from embodichain.agents.hierarchy.agent_base import AgentBase
+from langchain_core.prompts import ChatPromptTemplate
+import os
+import numpy as np
+from typing import Dict, Tuple
+from embodichain.agents.mllm.prompt import CodePrompt
+from embodichain.data import database_agent_prompt_dir
+from pathlib import Path
+import re
+import importlib.util
+from datetime import datetime
+
+
+def format_execution_history(execution_history):
+    if not execution_history or len(execution_history) == 0:
+        return "None."
+
+    return "\n\n".join(f"{i}. {entry}" for i, entry in enumerate(execution_history, 1))
+
+
+def extract_python_code_and_text(llm_response: str) -> Tuple[str, str]:
+    """
+    Extract exactly ONE python code block from the LLM response,
+    and return:
+      - code: the content inside the python block
+      - text: all remaining explanation text (outside the code block)
+
+    Raises ValueError if zero or multiple python blocks are found.
+    """
+
+    pattern = r"```python\s*(.*?)\s*```"
+    matches = list(re.finditer(pattern, llm_response, re.DOTALL))
+
+    if len(matches) == 0:
+        raise ValueError("No python code block found in LLM response.")
+    if len(matches) > 1:
+        raise ValueError("Multiple python code blocks found in LLM response.")
+
+    match = matches[0]
+    code = match.group(1).strip()
+
+    # Optional sanity check
+    if not code.startswith("#") and not code.startswith("drive("):
+        raise ValueError(
+            f"Invalid code block content. Expected `drive(...)` or `# TASK_COMPLETE`, got:\n{code}"
+        )
+
+    # Extract remaining text (before + after the code block)
+    text_before = llm_response[: match.start()].strip()
+    text_after = llm_response[match.end() :].strip()
+
+    explanation_text = "\n\n".join(part for part in [text_before, text_after] if part)
+
+    return code, explanation_text
+
+
+def format_llm_response_md(
+    llm_analysis: str,  # plain-text explanation (NO code)
+    extracted_code: str,  # validated executable code
+    step_id: int = None,
+    execution_history: str = None,
+    obs_image_path: Path = None,
+    md_file_path: Path = None,
+) -> str:
+    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    header = f"## Step: {step_id if step_id is not None else '-'} | {ts}\n\n"
+
+    history_block = ""
+    if execution_history:
+        history_block = (
+            "### Execution History (Input to LLM)\n\n"
+            "```\n"
+            f"{execution_history}\n"
+            "```\n\n"
+        )
+
+    image_block = ""
+    if obs_image_path is not None and md_file_path is not None:
+        try:
+            rel_path = obs_image_path.relative_to(md_file_path.parent)
+        except ValueError:
+            # Fallback: just use filename
+            rel_path = obs_image_path.name
+
+        image_block = (
+            "### Observation Image\n\n" f"![]({Path(rel_path).as_posix()})\n\n"
+        )
+
+    body = (
+        image_block + history_block + "### LLM Analysis\n\n"
+        f"{llm_analysis.strip()}\n\n"
+        "### Executed Code\n\n"
+        "```python\n"
+        f"{extracted_code.strip()}\n"
+        "```\n\n"
+        "---\n\n"
+    )
+
+    return header + body
+
+
+class CodeAgent(AgentBase):
+    query_prefix = "# "
+    query_suffix = "."
+    prompt: ChatPromptTemplate
+    prompt_kwargs: Dict[str, Dict]
+
+    def __init__(self, llm, **kwargs) -> None:
+        super().__init__(**kwargs)
+        if llm is None:
+            raise ValueError(
+                "LLM is None. Please set the following environment variables:\n"
+                "  - AZURE_OPENAI_ENDPOINT\n"
+                "  - AZURE_OPENAI_API_KEY\n"
+                "Example:\n"
+                "  export AZURE_OPENAI_ENDPOINT='https://your-endpoint.openai.azure.com/'\n"
+                "  export AZURE_OPENAI_API_KEY='your-api-key'"
+            )
+        self.llm = llm
+
+    def generate(self, **kwargs):
+        log_dir = kwargs.get(
+            "log_dir", Path(database_agent_prompt_dir) / self.task_name
+        )
+        file_path = log_dir / "agent_generated_code.py"
+
+        # Check if the file already exists
+        if not kwargs.get("regenerate", False):
+            if file_path.exists():
+                print(f"Code file already exists at {file_path}, skipping writing.")
+                return file_path, kwargs, None
+
+        # Generate code via LLM
+        prompt = getattr(CodePrompt, self.prompt_name)(
+            **kwargs,
+        )
+
+        # insert feedback if exists
+        if len(kwargs.get("error_messages", [])) != 0:
+            # just use the last one
+            last_code = kwargs["generated_codes"][-1]
+            last_error = kwargs["error_messages"][-1]
+            last_observation = (
+                kwargs.get("observation_feedbacks")[-1]
+                if kwargs.get("observation_feedbacks")
+                else None
+            )
+
+            # Add extra human message with feedback
+            feedback_msg = self.build_feedback_message(
+                last_code, last_error, last_observation
+            )
+            prompt.messages.append(feedback_msg)
+
+        llm_code = self.llm.invoke(prompt)
+
+        # Normalize content
+        llm_code = getattr(llm_code, "content", str(llm_code))
+
+        print(f"\033[92m\nCode agent output:\n{llm_code}\n\033[0m")
+
+        # Write the code to the file if it does not exist
+        match = re.search(r"```python\n(.*?)\n```", llm_code, re.DOTALL)
+        if match:
+            code_to_save = match.group(1).strip()
+        else:
+            code_to_save = llm_code.strip()
+
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(file_path, "w") as f:
+            f.write(code_to_save)
+        print(f"Generated function code saved to {file_path}")
+
+        return file_path, kwargs, code_to_save
+
+    def act(self, code_file_path, **kwargs):
+        """Execute generated code with proper execution environment.
+
+        Supports two modes:
+        1. If code defines 'create_agent_action_list' function, call it
+        2. If code contains module-level drive() calls, execute them directly
+        """
+        import ast
+
+        # Read the generated code file
+        with open(code_file_path, "r") as f:
+            code_content = f.read()
+
+        # Build execution namespace with necessary imports
+        ns = {
+            "__builtins__": __builtins__,
+            "__name__": "__main__",
+            "__file__": str(code_file_path),
+            "kwargs": kwargs,  # Make kwargs available for injection
+        }
+
+        # Import toolkit functions into namespace
+        try:
+            exec(
+                "from embodichain.toolkits.interfaces import *",
+                ns,
+                ns,
+            )
+        except Exception as e:
+            raise RuntimeError(
+                "Failed to import embodichain.toolkits.interfaces"
+            ) from e
+
+        # Parse code to check if it defines a function or contains module-level calls
+        tree = ast.parse(code_content)
+
+        # Check if code defines create_agent_action_list function
+        has_function = any(
+            isinstance(node, ast.FunctionDef)
+            and node.name == "create_agent_action_list"
+            for node in tree.body
+        )
+
+        if has_function:
+            # Execute code (function will be defined in namespace)
+            exec(code_content, ns, ns)
+
+            # Call the function if it exists
+            if "create_agent_action_list" in ns:
+                result = ns["create_agent_action_list"](**kwargs)
+                print("Function executed successfully.")
+                return result
+            else:
+                raise AttributeError(
+                    "The function 'create_agent_action_list' was not found after execution."
+                )
+        else:
+            # Code contains module-level drive() calls
+            # AST transformer to inject **kwargs into function calls
+            class InjectKwargs(ast.NodeTransformer):
+                def visit_Call(self, node):
+                    self.generic_visit(node)
+                    # Inject **kwargs if not present
+                    has_kwargs = any(
+                        kw.arg is None
+                        and isinstance(kw.value, ast.Name)
+                        and kw.value.id == "kwargs"
+                        for kw in node.keywords
+                    )
+                    if not has_kwargs:
+                        node.keywords.append(
+                            ast.keyword(
+                                arg=None, value=ast.Name(id="kwargs", ctx=ast.Load())
+                            )
+                        )
+                    return node
+
+            # Transform AST to inject kwargs
+            tree = InjectKwargs().visit(tree)
+            ast.fix_missing_locations(tree)
+
+            # Compile and execute transformed code
+            compiled_code = compile(tree, filename=str(code_file_path), mode="exec")
+            exec(compiled_code, ns, ns)
+
+            # Collect actions from drive() calls if they were executed
+            # drive() function stores actions in env._episode_action_list
+            if "env" in kwargs:
+                env = kwargs["env"]
+                if hasattr(env, "_episode_action_list") and env._episode_action_list:
+                    print(
+                        f"Collected {len(env._episode_action_list)} actions from module-level drive() calls."
+                    )
+                    return env._episode_action_list
+
+            print("Code executed successfully, but no actions were collected.")
+            return []
diff --git a/embodichain/agents/hierarchy/llm.py b/embodichain/agents/hierarchy/llm.py
new file mode 100644
index 00000000..dce50532
--- /dev/null
+++ b/embodichain/agents/hierarchy/llm.py
@@ -0,0 +1,72 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import os
+from langchain_openai import AzureChatOpenAI
+
+# ------------------------------------------------------------------------------
+# Environment configuration
+# ------------------------------------------------------------------------------
+
+# Clear proxy if not needed (optional, can be set via environment variables)
+
+os.environ["ALL_PROXY"] = ""
+os.environ["all_proxy"] = ""
+
+# Proxy configuration (optional, uncomment if needed)
+# os.environ["HTTP_PROXY"] = "http://127.0.0.1:7890"
+# os.environ["HTTPS_PROXY"] = "http://127.0.0.1:7890"
+
+# API version (optional, defaults to "2024-10-21" if not set)
+# os.environ["OPENAI_API_VERSION"] = "2024-10-21"
+
+# Note: AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY must be set via environment variables
+# Example in bash:
+#   export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/"
+#   export AZURE_OPENAI_API_KEY="your-api-key"
+
+# ------------------------------------------------------------------------------
+# LLM factory
+# ------------------------------------------------------------------------------
+
+
+def create_llm(*, temperature=0.0, model="gpt-4o"):
+    return AzureChatOpenAI(
+        temperature=temperature,
+        model=model,
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+        api_version=os.getenv("OPENAI_API_VERSION", "2024-10-21"),
+    )
+
+
+# ------------------------------------------------------------------------------
+# LLM instances
+# ------------------------------------------------------------------------------
+
+
+# Initialize LLM instances, but handle errors gracefully for documentation builds
+def _create_llm_safe(*, temperature=0.0, model="gpt-4o"):
+    try:
+        return create_llm(temperature=temperature, model=model)
+    except Exception:
+        return None
+
+
+task_llm = _create_llm_safe(temperature=0.0, model="gpt-4o")
+code_llm = _create_llm_safe(temperature=0.0, model="gpt-4o")
+validation_llm = _create_llm_safe(temperature=0.0, model="gpt-4o")
+view_selection_llm = _create_llm_safe(temperature=0.0, model="gpt-4o")
diff --git a/embodichain/agents/hierarchy/task_agent.py b/embodichain/agents/hierarchy/task_agent.py
new file mode 100644
index 00000000..5f7b5403
--- /dev/null
+++ b/embodichain/agents/hierarchy/task_agent.py
@@ -0,0 +1,157 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from typing import List, Dict, Tuple
+from embodichain.agents.hierarchy.agent_base import AgentBase
+from langchain_core.prompts import ChatPromptTemplate
+from embodichain.data import database_2d_dir
+from embodichain.utils.utility import load_txt
+from embodichain.agents.mllm.prompt import TaskPrompt
+from embodichain.data import database_agent_prompt_dir
+from pathlib import Path
+import numpy as np
+import time
+import re
+
+USEFUL_INFO = """The error may be caused by: 
+1. You did not follow the basic background information, especially the world coordinate system with its xyz directions.
+2. You did not take into account the NOTE given in the atom actions or in the example functions.
+3. You did not follow the steps of the task descriptions.\n
+"""
+
+
+def extract_plan_and_validation(text: str) -> Tuple[str, List[str], List[str]]:
+    def get_section(src: str, name: str, next_name) -> str:
+        if next_name:
+            pat = re.compile(
+                rf"\[{name}\]\s*:\s*(.*?)\s*(?=\[{next_name}\]\s*:|\Z)",
+                re.DOTALL | re.IGNORECASE,
+            )
+        else:
+            pat = re.compile(
+                rf"\[{name}\]\s*:\s*(.*?)\s*\Z",
+                re.DOTALL | re.IGNORECASE,
+            )
+        m = pat.search(src)
+        return m.group(1).strip() if m else ""
+
+    step_re = re.compile(
+        r"Step\s*\d+\s*:.*?(?=Step\s*\d+\s*:|\Z)",
+        re.DOTALL | re.IGNORECASE,
+    )
+
+    # ---- plans ----
+    plans_raw = get_section(text, "PLANS", "VALIDATION_CONDITIONS")
+    plan_steps = [m.group(0).rstrip() for m in step_re.finditer(plans_raw)]
+    plan_str = "\n".join(plan_steps)
+
+    # normalized plan list (strip "Step k:")
+    plan_list = []
+    for step in plan_steps:
+        content = re.sub(r"^Step\s*\d+\s*:\s*", "", step, flags=re.IGNORECASE).strip()
+        if content:
+            plan_list.append(content)
+
+    # ---- validations ----
+    vals_raw = get_section(text, "VALIDATION_CONDITIONS", None)
+    validation_list = []
+    for m in step_re.finditer(vals_raw):
+        content = re.sub(
+            r"^Step\s*\d+\s*:\s*", "", m.group(0), flags=re.IGNORECASE
+        ).strip()
+        if content:
+            validation_list.append(content)
+
+    return plan_str, plan_list, validation_list
+
+
+class TaskAgent(AgentBase):
+    prompt: ChatPromptTemplate
+    object_list: List[str]
+    target: np.ndarray
+    prompt_name: str
+    prompt_kwargs: Dict[str, Dict]
+
+    def __init__(self, llm, **kwargs) -> None:
+        super().__init__(**kwargs)
+        if llm is None:
+            raise ValueError(
+                "LLM is None. Please set the following environment variables:\n"
+                "  - AZURE_OPENAI_ENDPOINT\n"
+                "  - AZURE_OPENAI_API_KEY\n"
+                "Example:\n"
+                "  export AZURE_OPENAI_ENDPOINT='https://your-endpoint.openai.azure.com/'\n"
+                "  export AZURE_OPENAI_API_KEY='your-api-key'"
+            )
+        self.llm = llm
+
+    def generate(self, **kwargs) -> str:
+        log_dir = kwargs.get(
+            "log_dir", Path(database_agent_prompt_dir) / self.task_name
+        )
+        file_path = log_dir / "agent_generated_plan.txt"
+
+        # Check if the file already exists
+        if not kwargs.get("regenerate", False):
+            if file_path.exists():
+                print(f"Plan file already exists at {file_path}, skipping writing.")
+                return load_txt(file_path)
+
+        # Generate query via LLM
+        prompts_ = getattr(TaskPrompt, self.prompt_name)(**kwargs)
+        if isinstance(prompts_, list):
+            # TODO: support two-stage prompts with feedback
+            start_time = time.time()
+            response = self.llm.invoke(prompts_[0])
+            query = response.content
+            print(
+                f"\033[92m\nSystem tasks output ({np.round(time.time()-start_time, 4)}s):\n{query}\n\033[0m"
+            )
+            for prompt in prompts_[1:]:
+                temp = prompt["kwargs"]
+                temp.update({"query": query})
+                start_time = time.time()
+                response = self.llm.invoke(prompt["prompt"].invoke(temp))
+                query = response.content
+                print(
+                    f"\033[92m\nSystem tasks output({np.round(time.time()-start_time, 4)}s):\n{query}\n\033[0m"
+                )
+        else:
+            # insert feedback if exists
+            if len(kwargs.get("error_messages", [])) != 0:
+                # just use the last one
+                last_plan = kwargs["generated_plans"][-1]
+                last_code = kwargs["generated_codes"][-1]
+                last_error = kwargs["error_messages"][-1]
+
+                # Add extra human message with feedback
+                feedback_msg = self.build_feedback_message(
+                    last_plan, last_code, last_error
+                )
+                prompts_.messages.append(feedback_msg)
+
+            response = self.llm.invoke(prompts_)
+            print(f"\033[92m\nTask agent output:\n{response.content}\n\033[0m")
+
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(file_path, "w") as f:
+            f.write(response.content)
+        print(f"Generated task plan saved to {file_path}")
+
+        return response.content
+
+    def act(self, *args, **kwargs):
+        return super().act(*args, **kwargs)
diff --git a/embodichain/agents/hierarchy/validation_agent.py b/embodichain/agents/hierarchy/validation_agent.py
new file mode 100644
index 00000000..98e5f159
--- /dev/null
+++ b/embodichain/agents/hierarchy/validation_agent.py
@@ -0,0 +1,241 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import os
+from langchain_core.messages import SystemMessage, HumanMessage
+from abc import ABCMeta
+from embodichain.utils.utility import encode_image_from_path
+import glob
+from embodichain.agents.hierarchy.llm import view_selection_llm
+
+
+def save_obs_image(obs_image, save_dir, step_id=None):
+    """
+    Save observation image using encode_image() and return its file path.
+    """
+    import base64
+    from embodichain.utils.utility import encode_image
+
+    if obs_image is None:
+        return None
+
+    if isinstance(save_dir, str):
+        from pathlib import Path
+
+        save_dir = Path(save_dir)
+
+    save_dir.mkdir(parents=True, exist_ok=True)
+
+    name = f"obs_step_{step_id}.png" if step_id is not None else "obs.png"
+    img_path = save_dir / name
+
+    # Encode to base64
+    base64_image = encode_image(obs_image)
+
+    # Decode base64 → bytes
+    img_bytes = base64.b64decode(base64_image)
+
+    # Write to file
+    with open(img_path, "wb") as f:
+        f.write(img_bytes)
+
+    return img_path
+
+
+def get_obj_position_info(env):
+    import json
+
+    position_info = {}
+    obj_uids = env.sim.get_rigid_object_uid_list()
+    for obj_name in obj_uids:
+        target_obj = env.sim.get_rigid_object(obj_name)
+        target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0)[:3, 3]
+        position_info[obj_name] = target_obj_pose.tolist()
+    return json.dumps(position_info, indent=4)
+
+
+class ValidationAgent(metaclass=ABCMeta):
+
+    def __init__(self, llm, **kwargs) -> None:
+        super().__init__()
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+        if llm is None:
+            raise ValueError(
+                "LLM is None. Please set the following environment variables:\n"
+                "  - AZURE_OPENAI_ENDPOINT\n"
+                "  - AZURE_OPENAI_API_KEY\n"
+                "Example:\n"
+                "  export AZURE_OPENAI_ENDPOINT='https://your-endpoint.openai.azure.com/'\n"
+                "  export AZURE_OPENAI_API_KEY='your-api-key'"
+            )
+        self.llm = llm
+
+    def validate(self, step_names, problematic_code, error_message, image_files):
+        # Construct the prompt
+        prompt = f"""
+        Analyze the execution of the following robot task:
+
+        Task name: {self.task_name}
+        Task description: {self.task_description}
+        Basic background knowledge: {self.basic_background}
+
+        You will be given images showing each step of the execution. For the step sequence:
+        {', '.join(step_names)}
+
+        Provide the following analysis:
+        1. Decide whether the full task succeeded or failed.
+        2. If the task failed, provide a precise and detailed explanation.
+
+        Below is a potentially problematic piece of code and the corresponding execution error:
+
+        ```python
+        {problematic_code}
+        # Execution error:
+        {error_message}
+        Explain whether (and how) this code contributed to the observed failure.
+        """
+
+        # Prepare message content for API call
+        user_content = []
+
+        # Add textual prompt
+        user_content.append({"type": "text", "text": prompt})
+
+        # Add images and step names
+        for img_path in image_files:
+            filename = os.path.basename(img_path)
+            first_underscore_pos = filename.find("_")
+            if first_underscore_pos != -1:
+                step_name = filename[first_underscore_pos + 1 :].rsplit(".", 1)[0]
+            else:
+                step_name = filename.rsplit(".", 1)[0]
+
+            # Add step name
+            user_content.append({"type": "text", "text": f"Step: {step_name}"})
+
+            # Add image as base64
+            base64_image = encode_image_from_path(img_path)
+            user_content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{base64_image}"},
+                }
+            )
+
+        messages = [
+            SystemMessage(
+                content="You are a robot task execution analysis expert. Please analyze the provided image sequence."
+            ),
+            HumanMessage(content=user_content),
+        ]
+
+        response = self.llm.invoke(messages)
+        return response.content
+
+    def select_best_view_dir(
+        self, img_dirs: dict, action_description: str, valid_condition: str
+    ):
+        """
+        img_dirs: {
+            "cam_1": Path,
+            "cam_2": Path,
+            "cam_3": Path
+        }
+        """
+
+        # --- collect final images ---
+        last_images = {}
+        for cam_id, cam_dir in img_dirs.items():
+            imgs = sorted(
+                glob.glob(os.path.join(cam_dir, "obs_step_*.png")),
+                key=lambda p: int(os.path.basename(p).split("_")[-1].split(".")[0]),
+            )
+            if imgs:
+                last_images[cam_id] = imgs[-1]
+
+        if not last_images:
+            raise ValueError("No images found in any camera directory.")
+
+        # --- system prompt ---
+        system_prompt = (
+            "You are a robot perception assistant specialized in VIEW SELECTION.\n\n"
+            "TASK:\n"
+            "- You are given ONE final observation image from EACH camera view.\n"
+            "- Your job is NOT to judge success or failure.\n"
+            "- Your job is ONLY to select the SINGLE camera view that is MOST SUITABLE\n"
+            "  for OBJECT-LEVEL validation of the action result.\n\n"
+            "ACTION CONTEXT:\n"
+            "- The robot has just executed ONE atomic action.\n"
+            "- You are given the action intention and the expected object-level outcome\n"
+            "  ONLY to help you decide which view best reveals that outcome.\n\n"
+            "SELECTION CRITERIA (PRIORITY ORDER):\n"
+            "- Prefer views with:\n"
+            "  * the clearest visibility of the relevant object(s)\n"
+            "  * minimal occlusion by the arm or environment\n"
+            "  * the clearest evidence related to the expected object-level result\n"
+            "    (e.g., contact, separation, support, stability)\n\n"
+            "STRICT CONSTRAINTS:\n"
+            "- Do NOT judge robot motion quality or execution accuracy.\n"
+            "- Do NOT reason about numeric values (distance, angle, offset).\n"
+            "- Do NOT decide whether the action succeeded or failed.\n"
+            "- If multiple views are acceptable, choose the clearest overall view.\n\n"
+            "OUTPUT FORMAT (STRICT):\n"
+            "Output EXACTLY ONE of the following tokens:\n"
+            "- cam_1\n"
+            "- cam_2\n"
+            "- cam_3\n"
+        )
+
+        # --- human content ---
+        human_content = [
+            {
+                "type": "text",
+                "text": (
+                    "Select the best camera view for object-level validation.\n\n"
+                    "--------------------------------------------------\n"
+                    "ACTION DESCRIPTION (INTENT ONLY):\n"
+                    f"{action_description}\n\n"
+                    "EXPECTED OBJECT-LEVEL RESULT (REFERENCE ONLY):\n"
+                    f"{valid_condition}\n"
+                    "--------------------------------------------------"
+                ),
+            }
+        ]
+
+        for cam_id, img_path in last_images.items():
+            img_b64 = encode_image_from_path(img_path)
+            human_content.extend(
+                [
+                    {"type": "text", "text": f"View candidate: {cam_id}"},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{img_b64}"},
+                    },
+                ]
+            )
+
+        messages = [
+            SystemMessage(content=system_prompt),
+            HumanMessage(content=human_content),
+        ]
+
+        response = view_selection_llm.invoke(messages).content.strip()
+
+        if response not in img_dirs:
+            raise ValueError(f"Invalid camera selection from LLM: {response}")
+
+        return response
diff --git a/embodichain/agents/mllm/prompt/__init__.py b/embodichain/agents/mllm/prompt/__init__.py
new file mode 100644
index 00000000..55bc408b
--- /dev/null
+++ b/embodichain/agents/mllm/prompt/__init__.py
@@ -0,0 +1,8 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# All rights reserved.
+# ----------------------------------------------------------------------------
+
+from .task_prompt import TaskPrompt
+from .code_prompt import CodePrompt
diff --git a/embodichain/agents/mllm/prompt/code_prompt.py b/embodichain/agents/mllm/prompt/code_prompt.py
new file mode 100644
index 00000000..845064e5
--- /dev/null
+++ b/embodichain/agents/mllm/prompt/code_prompt.py
@@ -0,0 +1,149 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from langchain_core.messages import SystemMessage
+from langchain_core.prompts import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from embodichain.utils.utility import encode_image
+
+
+class CodePrompt:
+    @staticmethod
+    def one_stage_prompt(**kwargs) -> ChatPromptTemplate:
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(
+                    content="You are an AI assistant that can generate python code to execute robot arms."
+                ),
+                HumanMessagePromptTemplate.from_template(
+                    [
+                        {
+                            "type": "text",
+                            "text": (
+                                "Generate a Python code snippet that accomplishes the following task:\n"
+                                "{query}\n\n"
+                                "You must strictly follow the rules and available functions described below:\n"
+                                "{code_prompt}\n\n"
+                                "Here are some reference examples of the expected output code:\n"
+                                "{code_example}\n\n"
+                            ),
+                        }
+                    ]
+                ),
+            ]
+        )
+        return prompt.invoke(kwargs)
+
+    @staticmethod
+    def unified_prompt(observations, **kwargs):
+        """
+        Unified Vision→Code prompt:
+        - Model observes the image
+        - Understands the scene and the task goal
+        - Generates final executable Python code using atomic robot APIs
+        """
+
+        # Encode the image
+        observation = observations["rgb"]
+        kwargs.update({"observation": encode_image(observation)})
+
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(
+                    content=(
+                        "You are a reliable Vision-Language-Code robot assistant. "
+                        "You observe an image, understand the scene and the task goal, "
+                        "and generate correct Python code using ONLY the allowed atomic robot actions. "
+                        "Your final output must be a single Python code block."
+                    )
+                ),
+                HumanMessagePromptTemplate.from_template(
+                    [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "data:image/png;base64,{observation}",
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": (
+                                "### Task Goal\n"
+                                "{task_prompt}\n\n"
+                                "### Environment Background\n"
+                                "{basic_background}\n\n"
+                                "### Allowed Atomic Actions\n"
+                                "{atom_actions}\n\n"
+                                "### Code Rules\n"
+                                "{code_prompt}\n\n"
+                                "### Reference Code Examples\n"
+                                "{code_example}\n\n"
+                                "### Final Instructions\n"
+                                "Understand the scene from the image and generate final executable Python code "
+                                "that performs the task using ONLY the allowed atomic actions.\n\n"
+                                "Your entire response must be EXACTLY one Python code block:\n"
+                                "```python\n"
+                                "# your solution code here\n"
+                                "```\n"
+                            ),
+                        },
+                    ]
+                ),
+            ]
+        )
+
+        return prompt.invoke(kwargs)
+
+    @staticmethod
+    def one_stage_prompt_according_to_task_plan(**kwargs) -> ChatPromptTemplate:
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(
+                    content=(
+                        "You are a reliable robot control code generator.\n"
+                        "Your task is to generate Python code that executes robot arm actions.\n\n"
+                        "CRITICAL RULES:\n"
+                        "- The TASK PLAN defines the available atomic actions, rules, and execution logic.\n"
+                        "- You MUST strictly follow the TASK PLAN.\n"
+                        "- The CONSTRAINTS section contains additional global constraints you must obey.\n"
+                        "- Do NOT invent new actions, functions, parameters, or control flow.\n"
+                        "- You MAY include Python comments (# ...) inside the code.\n"
+                        "- Your ENTIRE response MUST be a single Python code block.\n"
+                        "- The code block MUST be directly executable without modification.\n"
+                        "- Do NOT include any text, explanation, or markdown outside the Python code block.\n"
+                    )
+                ),
+                HumanMessagePromptTemplate.from_template(
+                    [
+                        {
+                            "type": "text",
+                            "text": (
+                                "TASK PLAN (atomic actions, rules, and intended behavior):\n"
+                                "{task_plan}\n\n"
+                                "GLOBAL CONSTRAINTS (must be satisfied):\n"
+                                "{code_prompt}\n\n"
+                                "REFERENCE CODE (style and structure only; do NOT copy logic):\n"
+                                "{code_example}\n\n"
+                                "Generate the corrected Python code now."
+                            ),
+                        }
+                    ]
+                ),
+            ]
+        )
+        return prompt.invoke(kwargs)
diff --git a/embodichain/agents/mllm/prompt/task_prompt.py b/embodichain/agents/mllm/prompt/task_prompt.py
new file mode 100644
index 00000000..746e0f18
--- /dev/null
+++ b/embodichain/agents/mllm/prompt/task_prompt.py
@@ -0,0 +1,144 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import torch
+from langchain_core.messages import SystemMessage
+from langchain_core.prompts import (
+    ChatPromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from embodichain.utils.utility import encode_image
+
+
+class TaskPrompt:
+    @staticmethod
+    def one_stage_prompt(observations, **kwargs):
+        """
+        Hybrid one-pass prompt:
+        Step 1: VLM analyzes the image and extracts object IDs.
+        Step 2: LLM generates task instructions using only those IDs.
+        """
+        # Encode image
+        observation = (
+            observations["rgb"].cpu().numpy()
+            if isinstance(observations["rgb"], torch.Tensor)
+            else observations["rgb"]
+        )
+        kwargs.update({"observation": encode_image(observation)})
+
+        # Build hybrid prompt
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(
+                    content=(
+                        "You are a precise and reliable robotic manipulation planner. "
+                        "Given a camera observation and a task description, you must generate "
+                        "a clear, step-by-step task plan for a robotic arm. "
+                        "All actions must strictly use the provided atomic API functions, "
+                        "and the plan must be executable without ambiguity."
+                    )
+                ),
+                HumanMessagePromptTemplate.from_template(
+                    [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "data:image/png;base64,{observation}",
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": (
+                                "Here is the latest camera observation.\n"
+                                "First, analyze the scene in the image.\n"
+                                "Then, using the context below, produce an actionable task plan.\n\n"
+                                "**Environment background:** \n{basic_background}\n\n"
+                                '**Task goal:** \n"{task_prompt}"\n\n'
+                                "**Available atomic actions:** \n{atom_actions}\n"
+                            ),
+                        },
+                    ]
+                ),
+            ]
+        )
+
+        # Return the prompt template and kwargs to be executed by the caller
+        return prompt.invoke(kwargs)
+
+    @staticmethod
+    def two_stage_prompt(observations, **kwargs):
+        # for VLM generate image descriptions
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(
+                    content="You are a helpful assistant to operate a robotic arm with a camera to generate task plans according to descriptions."
+                ),
+                HumanMessagePromptTemplate.from_template(
+                    [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "data:image/jpg;base64,{observation}",
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": "What is in the image? Return answer with their potential effects.",
+                        },
+                    ]
+                ),
+            ]
+        )
+
+        observation = (
+            observations["rgb"].cpu().numpy()
+            if isinstance(observations["rgb"], torch.Tensor)
+            else observations["rgb"]
+        )
+        kwargs.update({"observation": encode_image(observation)})
+        # for LLM generate task descriptions
+        prompt_query = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(
+                    content="You are a helpful assistant to operate a robotic arm with a camera to generate task plans according to descriptions."
+                ),
+                HumanMessagePromptTemplate.from_template(
+                    [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "data:image/jpg;base64,{observation}",
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": "Here is analysis for this image: {query}.",
+                        },
+                        {
+                            "type": "text",
+                            "text": (
+                                "Using the context below, produce an actionable task plan.\n\n"
+                                "**Environment background:** \n{basic_background}\n\n"
+                                '**Task goal:** \n"{task_prompt}"\n\n'
+                                "**Available atomic actions:** \n{atom_actions}\n"
+                            ),
+                        },
+                    ]
+                ),
+            ]
+        )
+
+        return [prompt.invoke(kwargs), {"prompt": prompt_query, "kwargs": kwargs}]
diff --git a/embodichain/agents/prompts/atom_actions.txt b/embodichain/agents/prompts/atom_actions.txt
new file mode 100644
index 00000000..257464c5
--- /dev/null
+++ b/embodichain/agents/prompts/atom_actions.txt
@@ -0,0 +1,136 @@
+### Atom Functions for Robot Arm Control
+Each atomic function returns a list of joint-space trajectories (list[np.ndarray]).
+All functions support an optional argument:
+
+force_valid: bool
+    If True, the system will automatically correct an invalid target pose by
+    projecting it to the nearest valid pose. Use this option carefully:
+    enable it only for actions where small spatial deviations are acceptable
+    and will not compromise task correctness. Default is False.
+
+Use the following functions exactly as defined. Do not invent new APIs or parameters.
+
+"grasp":
+    def grasp(robot_name: str, obj_name: str, pre_grasp_dis: float, **kwargs) -> list[np.ndarray]
+
+    Moves the specified arm to the target object’s affordance-based grasp pose and executes a grasp by closing the gripper.
+
+    The function plans a two-stage trajectory:
+    (1) from the current pose to a pre-grasp pose offset from the object, and
+    (2) from the pre-grasp pose to the final grasp pose, followed by gripper closure.
+
+    Upon completion, the gripper is closed and the target object is expected to be stably held by the gripper.
+
+    Example:
+        grasp(robot_name='right_arm', obj_name='bottle', pre_grasp_dis=0.10)  # Moves the right arm to a pre-grasp pose 10 cm from the bottle, then to the grasp pose and closes the gripper to grasp the bottle.
+
+"place_on_table":
+    def place_on_table(robot_name: str, obj_name: str, x: float, y: float, pre_place_dis: float, **kwargs) -> list[np.ndarray]
+
+    Moves the specified robot arm with the target object to the desired [x, y] location on the table and opens the gripper to place the object.
+    The z-coordinate is automatically adjusted based on the table height and the object’s dimensions.
+    This function assumes that the robot is already holding the object and that the task is to place it on the table at the specified coordinates.
+    Remember that when you need to place some objects on the table at specific coordinates, use this function without using other movement atom actions.
+    Otherwise, **if you need to place some objects relative to some place, then use "move_relative_to_object" first to move to the desired position, then use "open_gripper" to release the object.**
+
+    Example:
+        place_on_table(robot_name='right_arm', obj_name='bottle', x=0.1, y=0.5, pre_place_dis=0.08)  #  Moves the right arm to a pre-place position 8 cm from the table, then places the bottle at the specified [0.1, 0.5] location on the table and opens the gripper.
+
+"move_relative_to_object":
+    def move_relative_to_object(robot_name: str, obj_name: str,
+                                x_offset=0, y_offset=0, z_offset=0,
+                                **kwargs) -> list[np.ndarray]
+    Moves the end-effector to a pose defined relative to the target object:
+        target = object_position + [x_offset, y_offset, z_offset]
+    Orientation is preserved.
+    Example:
+        move_relative_to_object(robot_name='right_arm', obj_name='cup',
+                                x_offset=0.05, y_offset=0.10, z_offset=0.10)  # Moves the right arm’s end-effector to a spot located 5 cm forward, 10 cm to the left, and 10 cm above the cup, while preserving the current gripper orientation.
+        move_relative_to_object(robot_name='right_arm', obj_name='cup',
+                                x_offset=-0.05, y_offset=-0.10, z_offset=0.10)  # Moves the right arm’s end-effector to a spot located 5 cm backward, 10 cm to the right, and 10 cm above the cup, while preserving the current gripper orientation.
+
+"move_to_absolute_position":
+    def move_to_absolute_position(robot_name: str,
+                                  x=None, y=None, z=None,
+                                  **kwargs) -> list[np.ndarray]
+    Moves the end-effector to an absolute (x, y, z) position in world coordinates.
+    Any coordinate set to None remains unchanged.
+    Orientation is preserved.
+    Example:
+        move_to_absolute_position(robot_name='right_arm', x=0.10, y=0.10, z=None)  # Moves the end-effector to the absolute world position (x=0.10 m, y=0.10 m) while leaving z unchanged, and preserves the orientation.
+
+"move_by_relative_offset":
+    def move_by_relative_offset(robot_name: str,
+                                dx=0.0, dy=0.0, dz=0.0, mode='extrinsic',
+                                **kwargs) -> list[np.ndarray]
+    Moves the end-effector by a relative translation:
+        new_position = current_position + [dx, dy, dz]
+    The offset is applied along the specified axes using the given mode, while preserving the original end-effector orientation.
+    Mode can be 'extrinsic' (world frame) or 'intrinsic' (end-effector frame). If you want to move along the world axes, use 'extrinsic'. If you want to move along the end-effector’s local axes, use "intrinsic".
+    Example:
+        move_by_relative_offset(robot_name='right_arm', dx=0.05, dy=-0.10, dz=0.20, mode='extrinsic')  # Translates the end-effector by +5 cm in x (front), −10 cm in y (right), +20 cm in z (above) in the world coordinate, relative to its current position, with orientation preserved.
+        move_by_relative_offset(robot_name='right_arm', dx=0, dy=0, dz=0.1, mode='intrinsic')  # Translates the end-effector by +10 cm in z (forward) in the EEF coordinate, meaning that it moves forward relative to its current facing direction, with orientation preserved.
+
+"rotate_eef"
+    def rotate_eef(robot_name: str, degree: float, **kwargs) -> list[np.ndarray]
+    Rotates the wrist roll joint (joint index 5) of the specified arm by the
+    given number of degrees. End-effector position is preserved.
+    Example:
+        rotate_eef(robot_name='right_arm', degree=-90)  # Rotates the right arm’s wrist-roll joint by −45° (counterclockwise), while keeping the end-effector position unchanged. This is a joint-level rotation, not a full orientation override.
+        rotate_eef(robot_name='right_arm', degree=90)  # Rotates the right arm’s wrist-roll joint by 45° (clockwise), while keeping the end-effector position unchanged. This is a joint-level rotation, not a full orientation override.
+    Typical use cases:
+        Pouring or tilting a grasped object.
+        Rotating the gripper around its forward axis without translating the end effector.
+        After rotating, you typically need to apply an opposite rotation back to return to the original pose.
+    Usage notes:
+        Rotation sign convention: negative = counterclockwise, positive = clockwise, viewed along the end-effector forward axis.
+        For pouring with the right arm, a common pattern is: first apply a negative rotation to start pouring, then apply a positive rotation to return.
+        For the left arm, the sign convention is typically reversed.
+
+"orient_eef":
+    def orient_eef(robot_name: str,
+                   direction: str = 'front',   # 'front' or 'down'
+                   **kwargs) -> list[np.ndarray]
+    Reorients the end-effector to a predefined canonical orientation in the
+    WORLD coordinate frame, while keeping the EE’s current position fixed.
+    This function replaces the entire 3×3 orientation matrix of the current
+    end-effector pose.
+    Usage notes:
+        This function should only be used when you explicitly need to override the end-effector’s full orientation.
+        This differs from rotate_eef(). orient_eef performs a full orientation override of the end-effector, not a single-joint rotation. For tasks like pouring, no need to use it.
+        For general wrist rotation, prefer using rotate_eef instead.
+        For aligning tasks, use "front" or "down" orientations as needed.
+    Supported orientations:
+        • 'front' : Align the end-effector so its direction faces forward.
+        • 'down'  : Align the end-effector so its direction faces downward.
+    Example:
+        orient_eef(robot_name='right_arm', direction='front')  # Reorients the right arm’s end-effector so it faces forward
+
+"back_to_initial_pose":
+    def back_to_initial_pose(robot_name: str, **kwargs) -> list[np.ndarray]
+    Returns the specified arm to its predefined initial joint configuration
+    stored in the environment.
+    Example:
+        back_to_initial_pose(robot_name='right_arm')  # Returns the right arm back to its predefined initial joint configuration stored in the environment, regardless of its current pose.
+
+"close_gripper":
+    def close_gripper(robot_name: str, **kwargs) -> list[np.ndarray]
+    Closes the arm’s gripper using a short (10-step) gripper-only trajectory.
+    Example:
+        close_gripper(robot_name='right_arm')  # Closes the right gripper using a short, smooth 10-step gripper-only trajectory.
+
+"open_gripper":
+    def open_gripper(robot_name: str, **kwargs) -> list[np.ndarray]
+    Opens the arm’s gripper using a short (10-step) gripper-only trajectory.
+    Example:
+        open_gripper(robot_name='right_arm')  # Opens the right gripper using a 10-step gripper-only trajectory.
+
+### Drive Function (Trajectory Synchronization)
+"drive":
+    def drive(left_arm_action=None, right_arm_action=None, **kwargs) -> list[torch.Tensor]
+    Wraps one or two arm trajectories into synchronized full-robot actions.
+    • If only one arm action is provided, the other arm stays idle.
+    • If both are provided, they are temporally aligned and executed together.
+    • The actions are obtained from the output of the above functions.
+    Example:
+        drive(left_arm_action=left_actions, right_arm_action=right_actions)
\ No newline at end of file
diff --git a/embodichain/agents/prompts/basic_background.txt b/embodichain/agents/prompts/basic_background.txt
new file mode 100644
index 00000000..dc6d1c30
--- /dev/null
+++ b/embodichain/agents/prompts/basic_background.txt
@@ -0,0 +1,42 @@
+The environment uses a right-handed world coordinate system, where 1 unit equals 1 meter.
+All robot poses are represented as 4×4 homogeneous transformation matrices.
+
+The robot base coordinate frame is the ONLY authoritative frame for all spatial reasoning, planning, and action generation.
+
+CAMERA AND IMAGE INTERPRETATION
+
+The camera is positioned in front of the robot, facing the robot arm and looking toward the robot base.
+Because of this viewpoint, the rendered image is horizontally mirrored relative to the robot base frame.
+This mirroring affects LEFT–RIGHT only. There is NO vertical or depth inversion.
+
+Mirror mapping (image → robot base frame):
+
+* Image left corresponds to robot right
+* Image right corresponds to robot left
+* Image up corresponds to robot up
+* Image down corresponds to robot down
+
+REQUIRED REASONING PERSPECTIVE (NON-NEGOTIABLE)
+
+You must ignore the camera and rendered image orientation when reasoning.
+All spatial reasoning must be performed as if you are physically located at the robot base, looking outward along the robot’s +x (forward) direction.
+
+Do NOT reason from the camera viewpoint.
+Do NOT trust left/right as shown in the image.
+Always remap image left/right before reasoning.
+
+ROBOT BASE COORDINATE DEFINITIONS
+
+All directions below are defined strictly in the robot base frame:
+
+* Moving forward increases x
+* Moving backward decreases x
+* Moving left increases y (appears as right in the image)
+* Moving right decreases y (appears as left in the image)
+* Moving up increases z
+* Moving down decreases z
+
+ROBOT INITIALIZATION AND TERMINATION
+
+Both robot arms start in predefined initial configurations with their end-effectors open.
+At task completion, both arms must be returned to their initial poses.
\ No newline at end of file
diff --git a/embodichain/agents/prompts/code_example.txt b/embodichain/agents/prompts/code_example.txt
new file mode 100644
index 00000000..c2952fed
--- /dev/null
+++ b/embodichain/agents/prompts/code_example.txt
@@ -0,0 +1,35 @@
+# Python scripts
+# Use the right arm to grasp bottle, move to the target location (x=0.2, y=0.1), and then open the gripper to release the object.
+
+```python
+# Step 1 — Reach and grasp the bottle
+drive(
+    right_arm_action=grasp(
+        robot_name="right_arm",
+        obj_name="bottle",
+    ),
+)
+
+# Step 2 — Move to target location
+drive(
+    right_arm_action=move_to_absolute_position(
+        robot_name="right_arm",
+        x=0.2,
+        y=0.1,
+    ),
+)
+
+# Step 3 — Open gripper to release the object
+drive(
+    right_arm_action=open_gripper(
+        robot_name="right_arm",
+    ),
+)
+
+# Step 4 — Return the arm to the initial pose
+drive(
+    right_arm_action=back_to_initial_pose(
+        robot_name="right_arm",
+    ),
+)
+```
\ No newline at end of file
diff --git a/embodichain/agents/prompts/code_prompt.txt b/embodichain/agents/prompts/code_prompt.txt
new file mode 100644
index 00000000..3fadf1c9
--- /dev/null
+++ b/embodichain/agents/prompts/code_prompt.txt
@@ -0,0 +1,7 @@
+Constraints:
+- Every atomic action MUST be executed via a single drive(...) call.
+- Each drive(...) call must directly contain the atomic action(s); do NOT define actions separately and then pass them into drive.
+- For single-arm execution: specify the active arm’s action and explicitly set the unused arm to None within the same drive(...) call.
+- For dual-arm execution: both arms’ actions MUST be specified within the same drive(...) call.
+- Use exactly one drive(...) call per step; no exceptions.
+- Output MUST be executable Python code only: no explanations, no comments, no markdown, and no extra text.
\ No newline at end of file
diff --git a/embodichain/data/__init__.py b/embodichain/data/__init__.py
index 9e152ab9..fccbbc24 100644
--- a/embodichain/data/__init__.py
+++ b/embodichain/data/__init__.py
@@ -14,5 +14,13 @@
 # limitations under the License.
 # ----------------------------------------------------------------------------
 
+import os
+
+
+database_dir = os.path.dirname(os.path.abspath(__file__)).replace("data", "database")
+database_2d_dir = os.path.join(database_dir, "2dasset")
+database_agent_prompt_dir = os.path.join(database_dir, "agent_prompt")
+database_demo_dir = os.path.join(database_dir, "demostration")
+
 from . import assets
 from .dataset import *
diff --git a/embodichain/data/enum.py b/embodichain/data/enum.py
index cf758192..edf84c3c 100644
--- a/embodichain/data/enum.py
+++ b/embodichain/data/enum.py
@@ -34,6 +34,18 @@ class SemanticMask(IntEnum):
     ROBOT = 2
 
 
+class Modality(Enum):
+    STATES = "states"
+    STATE_INDICATOR = "state_indicator"
+    ACTIONS = "actions"
+    ACTION_INDICATOR = "action_indicator"
+    IMAGES = "images"
+    LANG = "lang"
+    LANG_INDICATOR = "lang_indicator"
+    GEOMAP = "geomap"  # e.g., depth, point cloud, etc.
+    VISION_LANGUAGE = "vision_language"  # e.g., image + lang
+
+
 class EndEffector(Enum):
     GRIPPER = "gripper"
     DEXTROUSHAND = "hand"
@@ -53,6 +65,16 @@ class ControlParts(Enum):
     WAIST = "waist"
 
 
+class ControlPartsMappingW1(Enum):
+    ANKLE_IN_TORSO = 0
+    KNEE_IN_TORSO = 1
+    BUTTOCK_IN_TORSO = 2
+    WAIST_IN_TORSO = 3
+
+    NECK1_IN_HEAD = 0
+    NECK2_IN_HEAD = 1
+
+
 class Hints(Enum):
     EEF = (
         ControlParts.LEFT_EEF.value,
@@ -74,3 +96,33 @@ class EefType(Enum):
 class ActionMode(Enum):
     ABSOLUTE = ""
     RELATIVE = "delta_"  # This indicates the action is relative change with respect to last state.
+
+
+class PrivilegeType(Enum):
+    EXTEROCEPTION = "exteroception"
+    MASK = "mask"
+    STATE = "state"
+    PROGRESS = "progress"
+
+
+SUPPORTED_PROPRIO_TYPES = [
+    ControlParts.LEFT_ARM.value + EefType.POSE.value,
+    ControlParts.RIGHT_ARM.value + EefType.POSE.value,
+    ControlParts.LEFT_ARM.value + JointType.QPOS.value,
+    ControlParts.RIGHT_ARM.value + JointType.QPOS.value,
+    ControlParts.LEFT_EEF.value + EndEffector.DEXTROUSHAND.value,
+    ControlParts.RIGHT_EEF.value + EndEffector.DEXTROUSHAND.value,
+    ControlParts.LEFT_EEF.value + EndEffector.GRIPPER.value,
+    ControlParts.RIGHT_EEF.value + EndEffector.GRIPPER.value,
+    ControlParts.HEAD.value + JointType.QPOS.value,
+    ControlParts.WAIST.value + JointType.QPOS.value,
+]
+SUPPORTED_ACTION_TYPES = SUPPORTED_PROPRIO_TYPES + [
+    ControlParts.LEFT_ARM.value + ActionMode.RELATIVE.value + JointType.QPOS.value,
+    ControlParts.RIGHT_ARM.value + ActionMode.RELATIVE.value + JointType.QPOS.value,
+]
+SUPPORTED_EXTRA_VISION_TYPES = [
+    Modality.GEOMAP.value,
+    PrivilegeType.EXTEROCEPTION.value,
+    PrivilegeType.MASK.value,
+]
diff --git a/embodichain/lab/gym/envs/__init__.py b/embodichain/lab/gym/envs/__init__.py
index 88257690..e4a16daa 100644
--- a/embodichain/lab/gym/envs/__init__.py
+++ b/embodichain/lab/gym/envs/__init__.py
@@ -26,6 +26,14 @@
     PourWaterEnv,
 )
 from embodichain.lab.gym.envs.tasks.tableware.scoop_ice import ScoopIce
+from embodichain.lab.gym.envs.tasks.tableware.pour_water_v3 import (
+    PourWaterEnv3,
+    PourWaterAgentEnv3,
+)
+from embodichain.lab.gym.envs.tasks.tableware.rearrangement_v3 import (
+    RearrangementEnv3,
+    RearrangementAgentEnv3,
+)
 
 # Reinforcement learning environments
 from embodichain.lab.gym.envs.tasks.rl.push_cube import PushCubeEnv
diff --git a/embodichain/lab/gym/envs/action_bank/configurable_action.py b/embodichain/lab/gym/envs/action_bank/configurable_action.py
index 3dd649e9..37c5117e 100644
--- a/embodichain/lab/gym/envs/action_bank/configurable_action.py
+++ b/embodichain/lab/gym/envs/action_bank/configurable_action.py
@@ -30,7 +30,7 @@
 from embodichain.lab.sim.cfg import MarkerCfg
 from embodichain.lab.gym.utils.misc import resolve_env_params, data_key_to_control_part
 from embodichain.data.enum import Hints, EefExecute
-from .utils import generate_affordance_from_src, get_init_affordance
+from .utils import generate_affordance_from_src
 
 
 # https://stackoverflow.com/questions/41834530/how-to-make-python-decorators-work-like-a-tag-to-make-function-calls-by-tag
@@ -580,10 +580,10 @@ def initialize_with_given_qpos(action_list, executor, executor_init_info, env):
             if given_qpos is None:
                 log_warning(
                     "No given_qpos is provided for initialize_with_given_qpos. Using {}.".format(
-                        get_init_affordance(executor)
+                        "{}_{}_qpos".format(executor, "init")
                     )
                 )
-                given_qpos = env.affordance_datas[get_init_affordance(executor)]
+                given_qpos = env.affordance_datas["{}_{}_qpos".format(executor, "init")]
 
             executor_qpos_dim = action_list[executor].shape[0]
             given_qpos = np.asarray(given_qpos)
@@ -914,7 +914,7 @@ def _pad_or_trim_last(x, new_T):
 
         # After node initialization, check if env.affordance_datas contains updated initial value for each executor.
         for executor in scope.keys():
-            init_node_name = get_init_affordance(executor)
+            init_node_name = "{}_{}_qpos".format(executor, "init")
             if (
                 not hasattr(env, "affordance_datas")
                 or init_node_name not in env.affordance_datas
diff --git a/embodichain/lab/gym/envs/action_bank/utils.py b/embodichain/lab/gym/envs/action_bank/utils.py
index 255e5b8f..7ba44527 100644
--- a/embodichain/lab/gym/envs/action_bank/utils.py
+++ b/embodichain/lab/gym/envs/action_bank/utils.py
@@ -25,8 +25,26 @@
 """Node Generation Utils"""
 
 
-def get_init_affordance(scope: str, tag: str = "init") -> str:
-    return "{}_{}_qpos".format(scope, tag)
+def get_control_part_joint_ids(env, key: str) -> List[int]:
+    from embodichain.data.enum import (
+        ControlParts,
+        ControlPartsMappingW1,
+    )
+    from embodichain.lab.gym.envs.action_bank.configurable_action import (
+        get_control_part,
+    )
+
+    control_part = get_control_part(env, key)
+    if control_part == ControlParts.WAIST.value:
+        waist_joint_id = env.robot.get_joint_ids(name=ControlParts.TORSO.value)[
+            ControlPartsMappingW1.WAIST_IN_TORSO.value
+        ]
+        if not isinstance(waist_joint_id, (list)):
+            return [waist_joint_id]
+        return waist_joint_id
+
+    else:
+        return env.robot.get_joint_ids(name=control_part, remove_mimic=True)
 
 
 def generate_affordance_from_src(
diff --git a/embodichain/lab/gym/envs/managers/dataset_manager.py b/embodichain/lab/gym/envs/managers/dataset_manager.py
index a0ca1688..d1ea3049 100644
--- a/embodichain/lab/gym/envs/managers/dataset_manager.py
+++ b/embodichain/lab/gym/envs/managers/dataset_manager.py
@@ -87,20 +87,28 @@ def __init__(self, cfg: object, env: EmbodiedEnv):
         super().__init__(cfg, env)
 
         ## TODO: fix configurable_action.py to avoid getting env.metadata['dataset']
-        # Extract robot_meta from first functor and add to env.metadata for backward compatibility
+        # Extract robot_meta and instruction from first functor and add to env.metadata for backward compatibility
         # This allows legacy code (like action_bank) to access robot_meta via env.metadata["dataset"]["robot_meta"]
         for mode_cfgs in self._mode_functor_cfgs.values():
             for functor_cfg in mode_cfgs:
-                if "robot_meta" in functor_cfg.params:
+                if (
+                    "robot_meta" in functor_cfg.params
+                    or "instruction" in functor_cfg.params
+                ):
                     if not hasattr(env, "metadata"):
                         env.metadata = {}
                     if "dataset" not in env.metadata:
                         env.metadata["dataset"] = {}
-                    env.metadata["dataset"]["robot_meta"] = functor_cfg.params[
-                        "robot_meta"
-                    ]
+                    if "robot_meta" in functor_cfg.params:
+                        env.metadata["dataset"]["robot_meta"] = functor_cfg.params[
+                            "robot_meta"
+                        ]
+                    if "instruction" in functor_cfg.params:
+                        env.metadata["dataset"]["instruction"] = functor_cfg.params[
+                            "instruction"
+                        ]
                     logger.log_info(
-                        "Added robot_meta to env.metadata for backward compatibility"
+                        "Added robot_meta and instruction to env.metadata for backward compatibility"
                     )
                     break
             else:
diff --git a/embodichain/lab/gym/envs/tasks/tableware/base_agent_env.py b/embodichain/lab/gym/envs/tasks/tableware/base_agent_env.py
new file mode 100644
index 00000000..e17557de
--- /dev/null
+++ b/embodichain/lab/gym/envs/tasks/tableware/base_agent_env.py
@@ -0,0 +1,198 @@
+import torch
+from embodichain.utils import logger
+
+
+class BaseAgentEnv:
+
+    def _init_agents(self, agent_config, task_name, agent_config_path=None):
+        from embodichain.agents.hierarchy.task_agent import TaskAgent
+        from embodichain.agents.hierarchy.code_agent import CodeAgent
+        from embodichain.agents.hierarchy.validation_agent import ValidationAgent
+        from embodichain.agents.hierarchy.llm import (
+            task_llm,
+            code_llm,
+            validation_llm,
+        )
+
+        if agent_config.get("TaskAgent") is not None:
+            self.task_agent = TaskAgent(
+                task_llm,
+                **agent_config["Agent"],
+                **agent_config["TaskAgent"],
+                task_name=task_name,
+                config_dir=agent_config_path,
+            )
+        self.code_agent = CodeAgent(
+            code_llm,
+            **agent_config["Agent"],
+            **agent_config.get("CodeAgent"),
+            task_name=task_name,
+            config_dir=agent_config_path,
+        )
+        self.validation_agent = ValidationAgent(
+            validation_llm,
+            task_name=task_name,
+            task_description=self.code_agent.prompt_kwargs.get("task_prompt")[
+                "content"
+            ],
+            basic_background=self.code_agent.prompt_kwargs.get("basic_background")[
+                "content"
+            ],
+            atom_actions=self.code_agent.prompt_kwargs.get("atom_actions")["content"],
+        )
+
+    def get_states(self):
+        # TODO: only support num_env = 1 for now
+        # store robot states in each env.reset
+        self.init_qpos = self.robot.get_qpos().squeeze(0)
+
+        self.left_arm_joints = self.robot.get_joint_ids(name="left_arm")
+        self.right_arm_joints = self.robot.get_joint_ids(name="right_arm")
+        self.left_eef_joints = self.robot.get_joint_ids(name="left_eef")
+        self.right_eef_joints = self.robot.get_joint_ids(name="right_eef")
+
+        self.left_arm_init_qpos = self.init_qpos[self.left_arm_joints]
+        self.right_arm_init_qpos = self.init_qpos[self.right_arm_joints]
+
+        self.left_arm_init_xpos = self.robot.compute_fk(
+            self.left_arm_init_qpos, name="left_arm", to_matrix=True
+        ).squeeze(0)
+        self.right_arm_init_xpos = self.robot.compute_fk(
+            self.right_arm_init_qpos, name="right_arm", to_matrix=True
+        ).squeeze(0)
+
+        self.left_arm_current_qpos = self.left_arm_init_qpos
+        self.right_arm_current_qpos = self.right_arm_init_qpos
+
+        self.left_arm_current_xpos = self.left_arm_init_xpos
+        self.right_arm_current_xpos = self.right_arm_init_xpos
+
+        self.left_arm_base_pose = self.robot.get_control_part_base_pose(
+            "left_arm", to_matrix=True
+        ).squeeze(0)
+        self.right_arm_base_pose = self.robot.get_control_part_base_pose(
+            "right_arm", to_matrix=True
+        ).squeeze(0)
+
+        self.open_state = torch.tensor([0.05])
+        self.close_state = torch.tensor([0.0])
+        self.left_arm_current_gripper_state = self.open_state
+        self.right_arm_current_gripper_state = self.open_state
+
+        # store some useful obj information
+        init_obj_info = {}
+        obj_uids = self.sim.get_rigid_object_uid_list()
+        for obj_name in obj_uids:
+            obj = self.sim.get_rigid_object(obj_name)
+            obj_pose = obj.get_local_pose(to_matrix=True).squeeze(0)
+            obj_height = obj_pose[2, 3]  # Extract the height (z-coordinate)
+            obj_grasp_pose = self.affordance_datas.get(
+                f"{obj_name}_grasp_pose_object", None
+            )
+            init_obj_info[obj_name] = {
+                "pose": obj_pose,  # Store the full pose (4x4 matrix)
+                "height": obj_height,  # Store the height (z-coordinate)
+                "grasp_pose_obj": (
+                    obj_grasp_pose.squeeze(0) if obj_grasp_pose is not None else None
+                ),  # Store the grasp pose if available
+            }
+        self.init_obj_info = init_obj_info
+
+    # -------------------- Common getters / setters --------------------
+
+    def get_obs_for_agent(self):
+        obs = self.get_obs(get_valid_sensor_data=True)
+        rgb = obs["sensor"]["cam_high"]["color"].squeeze(0)
+        valid_rgb_1 = obs["sensor"]["valid_cam_1"]["color"].squeeze(0)
+        valid_rgb_2 = obs["sensor"]["valid_cam_2"]["color"].squeeze(0)
+        valid_rgb_3 = obs["sensor"]["valid_cam_3"]["color"].squeeze(0)
+
+        # obs_image_path = save_obs_image(obs_image=self.get_obs_for_agent()["rgb_1"], save_dir='./', step_id=0)
+
+        return {
+            "rgb": rgb,
+            "valid_rgb_1": valid_rgb_1,
+            "valid_rgb_2": valid_rgb_2,
+            "valid_rgb_3": valid_rgb_3,
+        }
+
+        # depth = obs["sensor"]["cam_high"]["depth"].squeeze(0)
+        # mask = obs["sensor"]["cam_high"]["mask"].squeeze(0)
+        # semantic_mask = obs["sensor"]["cam_high"]["semantic_mask_l"].squeeze(0)
+        # return {"rgb": rgb, "depth": depth, "mask": mask, "semantic_mask": semantic_mask}
+
+    def get_current_qpos_agent(self):
+        return self.left_arm_current_qpos, self.right_arm_current_qpos
+
+    def set_current_qpos_agent(self, arm_qpos, is_left):
+        if is_left:
+            self.left_arm_current_qpos = arm_qpos
+        else:
+            self.right_arm_current_qpos = arm_qpos
+
+    def get_current_xpos_agent(self):
+        return self.left_arm_current_xpos, self.right_arm_current_xpos
+
+    def set_current_xpos_agent(self, arm_xpos, is_left):
+        if is_left:
+            self.left_arm_current_xpos = arm_xpos
+        else:
+            self.right_arm_current_xpos = arm_xpos
+
+    def get_current_gripper_state_agent(self):
+        return self.left_arm_current_gripper_state, self.right_arm_current_gripper_state
+
+    def set_current_gripper_state_agent(self, arm_gripper_state, is_left):
+        if is_left:
+            self.left_arm_current_gripper_state = arm_gripper_state
+        else:
+            self.right_arm_current_gripper_state = arm_gripper_state
+
+    # -------------------- IK / FK --------------------
+    def get_arm_ik(self, target_xpos, is_left, qpos_seed=None):
+        control_part = "left_arm" if is_left else "right_arm"
+        ret, qpos = self.robot.compute_ik(
+            name=control_part, pose=target_xpos, joint_seed=qpos_seed
+        )
+        return ret.all().item(), qpos.squeeze(0)
+
+    def get_arm_fk(self, qpos, is_left):
+        control_part = "left_arm" if is_left else "right_arm"
+        xpos = self.robot.compute_fk(
+            name=control_part, qpos=torch.as_tensor(qpos), to_matrix=True
+        )
+        return xpos.squeeze(0)
+
+    # -------------------- get only code for action list --------------------
+    def generate_code_for_actions(self, regenerate=False, **kwargs):
+        logger.log_info(
+            f"Generate code for creating action list for {self.code_agent.task_name}.",
+            color="green",
+        )
+
+        # Task planning
+        print(f"\033[92m\nStart task planning.\n\033[0m")
+
+        task_agent_input = self.task_agent.get_composed_observations(
+            env=self, regenerate=regenerate, **kwargs
+        )
+        task_plan = self.task_agent.generate(**task_agent_input)
+
+        # Code generation
+        print(f"\033[94m\nStart code generation.\n\033[0m")
+        code_agent_input = self.code_agent.get_composed_observations(
+            env=self, regenerate=regenerate, **kwargs
+        )
+        code_agent_input["task_plan"] = task_plan
+
+        code_file_path, kwargs, code = self.code_agent.generate(**code_agent_input)
+        return code_file_path, kwargs, code
+
+    # -------------------- get action list --------------------
+    def create_demo_action_list(self, regenerate=False):
+        code_file_path, kwargs, _ = self.generate_code_for_actions(
+            regenerate=regenerate
+        )
+        action_list = self.code_agent.act(code_file_path, **kwargs)
+        self.action_length = len(action_list)
+        return action_list
diff --git a/embodichain/lab/gym/envs/tasks/tableware/pour_water_v3.py b/embodichain/lab/gym/envs/tasks/tableware/pour_water_v3.py
new file mode 100644
index 00000000..c4b469b6
--- /dev/null
+++ b/embodichain/lab/gym/envs/tasks/tableware/pour_water_v3.py
@@ -0,0 +1,78 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# All rights reserved.
+# ----------------------------------------------------------------------------
+
+import os
+import torch
+import numpy as np
+from copy import deepcopy
+from typing import Dict, Union, Optional, Sequence, Tuple, List
+
+from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg
+from embodichain.lab.gym.utils.registration import register_env
+from embodichain.utils import configclass, logger
+
+from embodichain.lab.gym.envs.tasks.tableware.base_agent_env import BaseAgentEnv
+
+__all__ = ["PourWaterEnv3"]
+
+
+@register_env("PourWater-v3", max_episode_steps=600)
+class PourWaterEnv3(EmbodiedEnv):
+    def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs):
+        super().__init__(cfg, **kwargs)
+
+        action_config = kwargs.get("action_config", None)
+        if action_config is not None:
+            self.action_config = action_config
+
+    def is_task_success(self, **kwargs) -> torch.Tensor:
+        """Determine if the task is successfully completed. This is mainly used in the data generation process
+        of the imitation learning.
+
+        Args:
+            **kwargs: Additional arguments for task-specific success criteria.
+
+        Returns:
+            torch.Tensor: A boolean tensor indicating success for each environment in the batch.
+        """
+
+        bottle = self.sim.get_rigid_object("bottle")
+        cup = self.sim.get_rigid_object("cup")
+
+        bottle_final_xpos = bottle.get_local_pose(to_matrix=True)
+        cup_final_xpos = cup.get_local_pose(to_matrix=True)
+
+        bottle_ret = self._is_fall(bottle_final_xpos)
+        cup_ret = self._is_fall(cup_final_xpos)
+
+        return ~(bottle_ret | cup_ret)
+
+    def _is_fall(self, pose: torch.Tensor) -> torch.Tensor:
+        # Extract z-axis from rotation matrix (last column, first 3 elements)
+        pose_rz = pose[:, :3, 2]
+        world_z_axis = torch.tensor([0, 0, 1], dtype=pose.dtype, device=pose.device)
+
+        # Compute dot product for each batch element
+        dot_product = torch.sum(pose_rz * world_z_axis, dim=-1)  # Shape: (batch_size,)
+
+        # Clamp to avoid numerical issues with arccos
+        dot_product = torch.clamp(dot_product, -1.0, 1.0)
+
+        # Compute angle and check if fallen
+        angle = torch.arccos(dot_product)
+        return angle >= torch.pi / 4
+
+
+@register_env("PourWaterAgent-v3", max_episode_steps=600)
+class PourWaterAgentEnv3(BaseAgentEnv, PourWaterEnv3):
+    def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs):
+        super().__init__(cfg, **kwargs)
+        super()._init_agents(**kwargs)
+
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None):
+        obs, info = super().reset(seed=seed, options=options)
+        super().get_states()
+        return obs, info
diff --git a/embodichain/lab/gym/envs/tasks/tableware/rearrangement_v3.py b/embodichain/lab/gym/envs/tasks/tableware/rearrangement_v3.py
new file mode 100644
index 00000000..c666fc36
--- /dev/null
+++ b/embodichain/lab/gym/envs/tasks/tableware/rearrangement_v3.py
@@ -0,0 +1,97 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# All rights reserved.
+# ----------------------------------------------------------------------------
+
+import os
+import torch
+import numpy as np
+from copy import deepcopy
+from typing import Dict, Union, Optional, Sequence, Tuple, List
+
+from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg
+from embodichain.lab.gym.utils.registration import register_env
+from embodichain.utils import configclass, logger
+
+from embodichain.lab.gym.envs.tasks.tableware.base_agent_env import BaseAgentEnv
+
+__all__ = ["RearrangementEnv3"]
+
+
+@register_env("Rearrangement-v3", max_episode_steps=600)
+class RearrangementEnv3(EmbodiedEnv):
+    def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs):
+        super().__init__(cfg, **kwargs)
+
+        action_config = kwargs.get("action_config", None)
+        if action_config is not None:
+            self.action_config = action_config
+
+    def is_task_success(self) -> bool:
+        fork = self.sim.get_rigid_object("fork")
+        spoon = self.sim.get_rigid_object("spoon")
+        plate = self.sim.get_rigid_object("plate")
+        plate_pose = plate.get_local_pose(to_matrix=True)
+        # TODO: now only for 1 env
+        (
+            spoon_place_target_x,
+            spoon_place_target_y,
+            spoon_place_target_z,
+        ) = self.affordance_datas["spoon_place_pose"][:3, 3]
+        (
+            fork_place_target_x,
+            fork_place_target_y,
+            fork_place_target_z,
+        ) = self.affordance_datas["fork_place_pose"][:3, 3]
+
+        spoon_pose = spoon.get_local_pose(to_matrix=True)
+        spoon_x, spoon_y, spoon_z = spoon_pose[0, :3, 3]
+
+        fork_pose = fork.get_local_pose(to_matrix=True)
+        fork_x, fork_y, fork_z = fork_pose[0, :3, 3]
+
+        tolerance = self.metadata.get("success_params", {}).get("tolerance", 0.02)
+
+        # spoon and fork should with the x y range of tolerance related to plate.
+        return ~(
+            abs(spoon_x - spoon_place_target_x) > tolerance
+            or abs(spoon_y - spoon_place_target_y) > tolerance
+            or abs(fork_x - fork_place_target_x) > tolerance
+            or abs(fork_y - fork_place_target_y) > tolerance
+        )
+
+
+@register_env("RearrangementAgent-v3", max_episode_steps=600)
+class RearrangementAgentEnv3(BaseAgentEnv, RearrangementEnv3):
+    def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs):
+        super().__init__(cfg, **kwargs)
+        super()._init_agents(**kwargs)
+
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None):
+        obs, info = super().reset(seed=seed, options=options)
+        super().get_states()
+        return obs, info
+
+    def is_task_success(self):
+        fork = self.sim.get_rigid_object("fork")
+        spoon = self.sim.get_rigid_object("spoon")
+        plate = self.sim.get_rigid_object("plate")
+
+        plate_pose = plate.get_local_pose(to_matrix=True)
+        spoon_place_target_y = plate_pose[0, 1, 3] - 0.16
+        fork_place_target_y = plate_pose[0, 1, 3] + 0.16
+
+        spoon_pose = spoon.get_local_pose(to_matrix=True)
+        spoon_y = spoon_pose[0, 1, 3]
+
+        fork_pose = fork.get_local_pose(to_matrix=True)
+        fork_y = fork_pose[0, 1, 3]
+
+        tolerance = self.metadata.get("success_params", {}).get("tolerance", 0.02)
+
+        # spoon and fork should with the y range of tolerance related to plate.
+        return (
+            abs(spoon_y - spoon_place_target_y) <= tolerance
+            and abs(fork_y - fork_place_target_y) <= tolerance
+        )
diff --git a/embodichain/lab/gym/utils/gym_utils.py b/embodichain/lab/gym/utils/gym_utils.py
index 40ced87f..4a199b70 100644
--- a/embodichain/lab/gym/utils/gym_utils.py
+++ b/embodichain/lab/gym/utils/gym_utils.py
@@ -437,6 +437,9 @@ class ComponentCfg:
     env_cfg.sim_steps_per_control = config["env"].get("sim_steps_per_control", 4)
     env_cfg.extensions = deepcopy(config.get("env", {}).get("extensions", {}))
 
+    # load dataset config
+    env_cfg.dataset = config["env"].get("dataset", None)
+
     # TODO: support more env events, eg, grasp pose generation, mesh preprocessing, etc.
 
     env_cfg.dataset = ComponentCfg()
@@ -449,9 +452,14 @@ class ComponentCfg:
         for dataset_name, dataset_params in config["env"]["dataset"].items():
             dataset_params_modified = deepcopy(dataset_params)
 
+            # Extract function name if format is "module:ClassName"
+            func_name = dataset_params["func"]
+            if ":" in func_name:
+                func_name = func_name.split(":")[-1]
+
             # Find the function from multiple modules using the utility function
             dataset_func = find_function_from_modules(
-                dataset_params["func"],
+                func_name,
                 dataset_modules,
                 raise_if_not_found=True,
             )
@@ -471,6 +479,7 @@ class ComponentCfg:
             "embodichain.lab.gym.envs.managers.randomization",
             "embodichain.lab.gym.envs.managers.record",
             "embodichain.lab.gym.envs.managers.events",
+            "embodichain.lab.gym.envs.managers.real2sim",
         ]
 
         # parser env events config
diff --git a/embodichain/lab/gym/utils/misc.py b/embodichain/lab/gym/utils/misc.py
index b75b70af..9f839491 100644
--- a/embodichain/lab/gym/utils/misc.py
+++ b/embodichain/lab/gym/utils/misc.py
@@ -28,7 +28,7 @@
 from collections import OrderedDict
 from importlib import import_module
 from scipy.spatial.transform import Rotation as R
-from typing import Any, Dict, List, Tuple, Union, Sequence, Callable, Mapping
+from typing import Any, Dict, List, Tuple, Union, Sequence, Callable, Mapping, Optional
 
 import numpy as np
 
@@ -757,6 +757,13 @@ def resolve_env_attr(obj: Any, env: Any) -> Any:
 _EXPR = re.compile(r"\$\{([^}]+)\}")  # For searching ${...} marker
 
 
+def is_binocularcam(sensor):
+    from dexsim.sensor import BinocularCam
+    from embodichain.lab.sim.sensors import StereoCamera
+
+    return isinstance(sensor, BinocularCam) or isinstance(sensor, StereoCamera)
+
+
 def resolve_formatted_string(obj, local_vars=None, global_vars=None):
     """Given a dict carrys "${...}"-like strings , `eval` the "${...}$" values while keep the dict structure.
 
@@ -1382,3 +1389,43 @@ def is_stereocam(sensor) -> bool:
     from embodichain.lab.sim.sensors import StereoCamera
 
     return isinstance(sensor, StereoCamera)
+
+
+def _data_key_to_control_part(robot, control_parts, data_key: str) -> Optional[str]:
+    # TODO: Temporary workaround, should be removed after refactoring data dict extractor.
+    # @lru_cache(max_size=None) # NOTE: no way to pass a hashable parameter
+    def is_eef_hand_func(robot, control_parts) -> bool:
+        # TODO: This is a temporary workaround, should be used a more general method to check
+        # whether the end-effector is a hand.
+        for part in control_parts:
+            if "eef" in part:
+                joint_ids = robot.get_joint_ids(part, remove_mimic=True)
+                return len(joint_ids) >= 2
+        return False
+
+    from embodichain.data.enum import (
+        ControlParts,
+        EndEffector,
+        ActionMode,
+        JointType,
+        EefType,
+    )
+
+    is_eef_hand = is_eef_hand_func(robot, control_parts)
+
+    for control_part in ControlParts:
+        if EndEffector.DEXTROUSHAND.value in data_key and is_eef_hand:
+            return data_key.replace(EndEffector.DEXTROUSHAND.value, "")
+        elif EndEffector.DEXTROUSHAND.value in data_key and not is_eef_hand:
+            continue
+        elif EndEffector.GRIPPER.value in data_key and not is_eef_hand:
+            return data_key.replace(EndEffector.GRIPPER.value, "")
+        elif EndEffector.GRIPPER.value in data_key and is_eef_hand:
+            continue
+        elif ActionMode.RELATIVE.value + JointType.QPOS.value in data_key:
+            continue
+        elif EefType.POSE.value in data_key:
+            continue
+        elif control_part.value in data_key:
+            return control_part.value
+    return None
diff --git a/embodichain/lab/scripts/run_agent.py b/embodichain/lab/scripts/run_agent.py
new file mode 100644
index 00000000..e6546a59
--- /dev/null
+++ b/embodichain/lab/scripts/run_agent.py
@@ -0,0 +1,146 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import gymnasium
+import numpy as np
+import argparse
+import torch
+
+from embodichain.utils.utility import load_json
+from embodichain.lab.sim import SimulationManagerCfg
+from embodichain.lab.gym.envs import EmbodiedEnvCfg
+from embodichain.lab.gym.utils.gym_utils import (
+    config_to_cfg,
+)
+from embodichain.utils.logger import log_warning, log_info, log_error
+from .run_env import main
+
+
+if __name__ == "__main__":
+    np.set_printoptions(5, suppress=True)
+    torch.set_printoptions(precision=5, sci_mode=False)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--num_envs",
+        help="The number of environments to run in parallel.",
+        default=1,
+        type=int,
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        help="device to run the environment on, e.g., 'cpu' or 'cuda'",
+    )
+    parser.add_argument(
+        "--headless",
+        help="Whether to perform the simulation in headless mode.",
+        default=False,
+        action="store_true",
+    )
+    parser.add_argument(
+        "--enable_rt",
+        help="Whether to use RTX rendering backend for the simulation.",
+        default=False,
+        action="store_true",
+    )
+    parser.add_argument(
+        "--render_backend",
+        help="The rendering backend to use for the simulation.",
+        default="egl",
+        type=str,
+    )
+    parser.add_argument(
+        "--gpu_id",
+        help="The GPU ID to use for the simulation.",
+        default=0,
+        type=int,
+    )
+    parser.add_argument(
+        "--debug_mode",
+        help="Enable debug mode.",
+        default=False,
+        action="store_true",
+    )
+    parser.add_argument(
+        "--filter_visual_rand",
+        help="Whether to filter out visual randomization.",
+        default=False,
+        action="store_true",
+    )
+
+    parser.add_argument(
+        "--gym_config",
+        type=str,
+        help="Path to the gym configuration file.",
+        required=True,
+    )
+    parser.add_argument(
+        "--task_name",
+        type=str,
+        help="Name of the task.",
+        required=True,
+    )
+    parser.add_argument(
+        "--agent_config",
+        type=str,
+        help="Path to the agent configuration file.",
+        required=True,
+    )
+    parser.add_argument(
+        "--regenerate",
+        action="store_true",
+        help="Whether to regenerate code if already existed.",
+        default=False,
+    )
+
+    args = parser.parse_args()
+
+    # Validate arguments
+    if args.num_envs != 1:
+        log_error(f"Currently only support num_envs=1, but got {args.num_envs}.")
+        exit(1)
+
+    # Load configurations
+    gym_config = load_json(args.gym_config)
+    agent_config = load_json(args.agent_config)
+
+    # Build environment configuration
+    cfg: EmbodiedEnvCfg = config_to_cfg(gym_config)
+    cfg.filter_visual_rand = args.filter_visual_rand
+    cfg.num_envs = args.num_envs
+    cfg.sim_cfg = SimulationManagerCfg(
+        headless=args.headless,
+        sim_device=args.device,
+        enable_rt=args.enable_rt,
+        gpu_id=args.gpu_id,
+    )
+
+    # Create environment
+    env = gymnasium.make(
+        id=gym_config["id"],
+        cfg=cfg,
+        agent_config=agent_config,
+        agent_config_path=args.agent_config,
+        task_name=args.task_name,
+    )
+
+    # Run main function
+    main(args, env, gym_config)
+
+    if args.headless:
+        env.reset(options={"final": True})
diff --git a/embodichain/lab/scripts/run_env.py b/embodichain/lab/scripts/run_env.py
index 2268be0f..c9759409 100644
--- a/embodichain/lab/scripts/run_env.py
+++ b/embodichain/lab/scripts/run_env.py
@@ -32,9 +32,11 @@
 from embodichain.utils.logger import log_warning, log_info, log_error
 
 
-def generate_and_execute_action_list(env, idx, debug_mode):
+def generate_and_execute_action_list(env, idx, debug_mode, **kwargs):
 
-    action_list = env.get_wrapper_attr("create_demo_action_list")(action_sentence=idx)
+    action_list = env.get_wrapper_attr("create_demo_action_list")(
+        action_sentence=idx, **kwargs
+    )
 
     if action_list is None or len(action_list) == 0:
         log_warning("Action is invalid. Skip to next generation.")
@@ -86,7 +88,9 @@ def generate_function(
     while True:
         ret = []
         for trajectory_idx in range(num_traj):
-            valid = generate_and_execute_action_list(env, trajectory_idx, debug_mode)
+            valid = generate_and_execute_action_list(
+                env, trajectory_idx, debug_mode, **kwargs
+            )
 
             if not valid:
                 _, _ = env.reset()
@@ -124,14 +128,15 @@ def main(args, env, gym_config):
     log_info("Start offline data generation.", color="green")
     # TODO: Support multiple trajectories per episode generation.
     num_traj = 1
-    for i in range(gym_config["max_episodes"]):
+    for i in range(gym_config.get("max_episodes", 1)):
         generate_function(
             env,
             num_traj,
             i,
-            save_path=args.save_path,
-            save_video=args.save_video,
+            save_path=getattr(args, "save_path", ""),
+            save_video=getattr(args, "save_video", False),
             debug_mode=args.debug_mode,
+            regenerate=getattr(args, "regenerate", False),
         )
 
 
diff --git a/embodichain/lab/sim/objects/soft_object.py b/embodichain/lab/sim/objects/soft_object.py
index 116925e7..52f1f0c1 100644
--- a/embodichain/lab/sim/objects/soft_object.py
+++ b/embodichain/lab/sim/objects/soft_object.py
@@ -64,18 +64,18 @@ def __init__(
         self.ps = ps
         self.num_instances = len(entities)
 
-        softbodies = [
+        self.soft_bodies = [
             self.entities[i].get_physical_body() for i in range(self.num_instances)
         ]
-        self.n_collision_vertices = softbodies[0].get_num_vertices()
-        self.n_sim_vertices = softbodies[0].get_num_sim_vertices()
+        self.n_collision_vertices = self.soft_bodies[0].get_num_vertices()
+        self.n_sim_vertices = self.soft_bodies[0].get_num_sim_vertices()
 
         self._rest_position_buffer = torch.empty(
             (self.num_instances, self.n_collision_vertices, 4),
             device=self.device,
             dtype=torch.float32,
         )
-        for i, softbody in enumerate(softbodies):
+        for i, softbody in enumerate(self.soft_bodies):
             self._rest_position_buffer[i] = softbody.get_position_inv_mass_buffer()
 
         self._rest_sim_position_buffer = torch.empty(
@@ -84,7 +84,7 @@ def __init__(
             dtype=torch.float32,
         )
 
-        for i, softbody in enumerate(softbodies):
+        for i, softbody in enumerate(self.soft_bodies):
             self._rest_sim_position_buffer[i] = (
                 softbody.get_sim_position_inv_mass_buffer()
             )
@@ -148,9 +148,7 @@ def sim_vertex_position_buffer(self):
     def sim_vertex_velocity_buffer(self):
         """Get the current vertex velocity buffer of the soft bodies."""
         for i, softbody in enumerate(self.soft_bodies):
-            self._sim_vertex_velocity_buffer[i] = (
-                softbody.get_sim_position_inv_mass_buffer()
-            )
+            self._sim_vertex_velocity_buffer[i] = softbody.get_sim_velocity_buffer()
         return self._sim_vertex_velocity_buffer.clone()
 
 
diff --git a/embodichain/toolkits/code_generation.py b/embodichain/toolkits/code_generation.py
new file mode 100644
index 00000000..360e9e90
--- /dev/null
+++ b/embodichain/toolkits/code_generation.py
@@ -0,0 +1,88 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from typing import Dict, Tuple
+from langchain_core.output_parsers import BaseOutputParser
+
+import numpy as np
+
+
+def merge_dicts(dicts: Dict):
+    return {k: v for d in dicts for k, v in d.items()}
+
+
+def get_executable_code_str(input_string, language="python"):
+    start_marker = f"```{language}"
+    end_marker = f"```"
+    if input_string.find(start_marker) >= 0:
+
+        start_index = input_string.find(start_marker) + len(start_marker)
+        end_index = input_string.rfind(end_marker)
+
+        code_string = input_string[start_index:end_index].strip()
+    else:
+        code_string = input_string
+
+    return code_string
+
+
+class OutputFormatting:
+    @staticmethod
+    def flatten_dict(output: Dict[str, Dict]) -> Dict[str, np.ndarray]:
+        ret = {}
+        for _, val in output.items():
+            ret.update(val)
+        return ret
+
+
+class ExecutableOutputParser(BaseOutputParser):
+    # https://python.langchain.com/v0.1/docs/modules/model_io/output_parsers/custom/
+
+    _fixed_vars = {"np": np}
+    variable_vars = {}
+
+    def update_vars(self, variable_vars: Dict):
+        self.variable_vars = variable_vars
+
+    def parse(self, text: str) -> Tuple[str, Dict, Dict]:
+        code_str = get_executable_code_str(text)
+        # if self._cfg["include_context"] and context != "":
+        #     to_exec = f"{context}\n{code_str}"
+        #     to_log = f"{context}\n{use_query}\n{code_str}"
+        # else:
+        #     to_exec = code_str
+        #     to_log = f"{use_query}\n{to_exec}"
+
+        # to_log_pretty = highlight(to_log, PythonLexer(), TerminalFormatter())
+        # print(
+        #     f"\033[34m====================================================\nLMP {self._name} exec:\033[0m\n\n{to_log_pretty}\n\n\033[34m====================================================\n\033[0m"
+        # )
+
+        # generate new functions
+        # new_fs = self._lmp_fgen.create_new_fs_from_code(code_str)
+        # self._variable_vars.update(new_fs)
+
+        gvars = merge_dicts([self._fixed_vars, self.variable_vars])
+        lvars = None
+
+        if gvars is None:
+            gvars = {}
+        if lvars is None:
+            lvars = {}
+        empty_fn = lambda *args, **kwargs: None
+        custom_gvars = merge_dicts([gvars, {"exec": empty_fn, "eval": empty_fn}])
+
+        return code_str, custom_gvars, lvars
diff --git a/embodichain/toolkits/interfaces.py b/embodichain/toolkits/interfaces.py
new file mode 100644
index 00000000..5d6d967f
--- /dev/null
+++ b/embodichain/toolkits/interfaces.py
@@ -0,0 +1,1252 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import numpy as np
+from embodichain.utils.logger import log_info, log_warning, log_error
+from copy import deepcopy
+from embodichain.lab.gym.utils.misc import (
+    mul_linear_expand,
+    get_rotation_replaced_pose,
+)
+import torch
+from tqdm import tqdm
+from embodichain.lab.sim.planners.motion_generator import MotionGenerator
+from scipy.spatial.transform import Rotation as R
+from embodichain.utils.utility import encode_image
+import ast
+
+"""
+--------------------------------------------Some useful functions----------------------------------------------------
+--------------------------------------------Some useful functions----------------------------------------------------
+--------------------------------------------Some useful functions----------------------------------------------------
+"""
+
+
+def draw_axis(env, pose):
+    from embodichain.lab.sim.cfg import MarkerCfg
+
+    marker_cfg = MarkerCfg(
+        name="test",
+        marker_type="axis",
+        axis_xpos=pose,
+        axis_size=0.01,
+        axis_len=0.2,
+        arena_index=-1,  # All arenas
+    )
+    env.sim.draw_marker(cfg=marker_cfg)
+    env.sim.update()
+
+
+def get_arm_states(env, robot_name):
+
+    left_arm_current_qpos, right_arm_current_qpos = env.get_current_qpos_agent()
+    left_arm_current_pose, right_arm_current_pose = env.get_current_xpos_agent()
+    left_arm_current_gripper_state, right_arm_current_gripper_state = (
+        env.get_current_gripper_state_agent()
+    )
+
+    side = "right" if "right" in robot_name else "left"
+    is_left = True if side == "left" else False
+    select_arm = "left_arm" if is_left else "right_arm"
+
+    arms = {
+        "left": (
+            left_arm_current_qpos,
+            left_arm_current_pose,
+            left_arm_current_gripper_state,
+        ),
+        "right": (
+            right_arm_current_qpos,
+            right_arm_current_pose,
+            right_arm_current_gripper_state,
+        ),
+    }
+    (
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = arms[side]
+
+    return (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    )
+
+
+def find_nearest_valid_pose(env, select_arm, pose, xpos_resolution=0.1):
+    # use the validator to choose the nearest valid pose
+    # delete the cache every time
+    if isinstance(pose, torch.Tensor):
+        pose = pose.detach().cpu().numpy()
+    ret, _ = env.robot.compute_xpos_reachability(
+        select_arm,
+        pose,
+        xpos_resolution=xpos_resolution,
+        qpos_resolution=np.radians(60),
+        cache_mode="disk",
+        use_cached=False,
+        visualize=False,
+    )
+    ret = np.stack(ret, axis=0)
+    # find the nearest valid pose
+    xyz = pose[:3, 3]
+    ts = np.stack([M[:3, 3] for M in ret], axis=0)  # shape (N,3)
+    dists = np.linalg.norm(ts - xyz[None, :], axis=1)
+    best_idx = np.argmin(dists)
+    nearest_valid_pose = ret[best_idx]
+    return torch.from_numpy(nearest_valid_pose)
+
+
+def get_qpos(env, is_left, select_arm, pose, qpos_seed, force_valid=False, name=""):
+    if force_valid:
+        try:
+            ret, qpos = env.get_arm_ik(pose, is_left=is_left, qpos_seed=qpos_seed)
+            if not ret:
+                log_error(f"Generate {name} qpos failed.\n")
+        except Exception as e:
+            log_warning(
+                f"Original {name} pose invalid, using nearest valid pose. ({e})\n"
+            )
+            pose = find_nearest_valid_pose(env, select_arm, pose)
+
+            ret, qpos = env.get_arm_ik(pose, is_left=is_left, qpos_seed=qpos_seed)
+    else:
+        ret, qpos = env.get_arm_ik(pose, is_left=is_left, qpos_seed=qpos_seed)
+        if not ret:
+            log_error(f"Generate {name} qpos failed.\n")
+
+    return qpos
+
+
+def get_offset_pose(
+    pose_to_change: torch.Tensor,
+    offset_value: float,
+    direction: str = "z",
+    mode: str = "intrinsic",
+) -> torch.Tensor:
+
+    device = pose_to_change.device
+    dtype = pose_to_change.dtype
+
+    if isinstance(direction, str):
+        if direction == "x":
+            direction_vec = torch.tensor([1.0, 0.0, 0.0], device=device, dtype=dtype)
+        elif direction == "y":
+            direction_vec = torch.tensor([0.0, 1.0, 0.0], device=device, dtype=dtype)
+        elif direction == "z":
+            direction_vec = torch.tensor([0.0, 0.0, 1.0], device=device, dtype=dtype)
+        else:
+            log_error(f"Invalid direction '{direction}'. Must be 'x', 'y', or 'z'.")
+            return pose_to_change
+    else:
+        direction_vec = torch.as_tensor(direction, device=device, dtype=dtype)
+
+    direction_vec = direction_vec / torch.linalg.norm(direction_vec)
+    offset_matrix = torch.eye(4, device=device, dtype=dtype)
+    offset_matrix[:3, 3] = offset_value * direction_vec
+
+    if mode == "extrinsic":
+        offset_pose = offset_matrix @ pose_to_change
+    elif mode == "intrinsic":
+        offset_pose = pose_to_change @ offset_matrix
+    else:
+        log_error(f"Invalid mode '{mode}'. Must be 'extrinsic' or 'intrinsic'.")
+        return pose_to_change
+
+    return offset_pose
+
+
+def plan_trajectory(
+    env,
+    select_arm,
+    qpos_list,
+    sample_num,
+    select_arm_current_gripper_state,
+    select_qpos_traj,
+    ee_state_list_select,
+):
+    motion_generator = MotionGenerator(
+        robot=env.robot,
+        uid=select_arm,
+        **getattr(env, "planning_config", {}),
+    )
+    traj_list, _ = motion_generator.create_discrete_trajectory(
+        qpos_list=qpos_list,
+        sample_num=sample_num,
+        qpos_seed=qpos_list[0],
+        is_use_current_qpos=False,
+        **getattr(env, "planning_config", {}),
+    )
+
+    select_qpos_traj.extend(traj_list)
+    ee_state_list_select.extend([select_arm_current_gripper_state] * len(traj_list))
+
+
+def plan_gripper_trajectory(
+    env,
+    is_left,
+    sample_num,
+    execute_open,
+    select_arm_current_qpos,
+    select_qpos_traj,
+    ee_state_list_select,
+):
+    open_state = env.open_state
+    close_state = env.close_state
+
+    if execute_open:
+        ee_state_expand_select = np.array([close_state, open_state])
+        env.set_current_gripper_state_agent(open_state, is_left=is_left)
+    else:
+        ee_state_expand_select = np.array([open_state, close_state])
+        env.set_current_gripper_state_agent(close_state, is_left=is_left)
+
+    ee_state_expand_select = mul_linear_expand(ee_state_expand_select, [sample_num])
+
+    select_qpos_traj.extend([select_arm_current_qpos] * sample_num)
+    ee_state_list_select.extend(ee_state_expand_select)
+
+
+def finalize_actions(select_qpos_traj, ee_state_list_select):
+    # mimic eef state
+    actions = np.concatenate(
+        [
+            np.array(select_qpos_traj),
+            np.array(ee_state_list_select),
+            np.array(ee_state_list_select),
+        ],
+        axis=-1,
+    )
+    return actions
+
+
+def extract_drive_calls(code_str: str) -> list[str]:
+    tree = ast.parse(code_str)
+    lines = code_str.splitlines()
+
+    drive_blocks = []
+
+    for node in tree.body:
+        # Match: drive(...)
+        if (
+            isinstance(node, ast.Expr)
+            and isinstance(node.value, ast.Call)
+            and isinstance(node.value.func, ast.Name)
+            and node.value.func.id == "drive"
+        ):
+            # AST line numbers are 1-based
+            start = node.lineno - 1
+            end = node.end_lineno
+            block = "\n".join(lines[start:end])
+            drive_blocks.append(block)
+
+    return drive_blocks
+
+
+"""
+--------------------------------------------Atom action functions----------------------------------------------------
+--------------------------------------------Atom action functions----------------------------------------------------
+--------------------------------------------Atom action functions----------------------------------------------------
+"""
+
+
+# TODO: write a move_to_pose atom action, the use this action to form other atom actions
+def grasp(
+    robot_name: str,
+    obj_name: str,
+    pre_grasp_dis: float = 0.05,
+    env=None,
+    force_valid=False,
+    **kwargs,
+):
+    # Get target object
+    obj_uids = env.sim.get_rigid_object_uid_list()
+    if obj_name in obj_uids:
+        target_obj = env.sim.get_rigid_object(obj_name)
+    else:
+        log_error(f"No matched object {obj_uids}.")
+    target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0)
+
+    # Open the gripper if currently closed
+    actions = None
+    select_arm_current_gripper_state = (
+        env.left_arm_current_gripper_state
+        if "left" in robot_name
+        else env.right_arm_current_gripper_state
+    )
+    if select_arm_current_gripper_state <= env.open_state - 0.01:
+        actions = open_gripper(robot_name, env, **kwargs)
+
+    # Retract the end-effector to avoid collision
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+    select_arm_base_pose = (
+        env.left_arm_base_pose if is_left else env.right_arm_base_pose
+    )
+    base_to_eef_xy_dis = torch.norm(
+        select_arm_base_pose[:2, 3] - select_arm_current_pose[:2, 3]
+    )
+    base_to_obj_xy_dis = torch.norm(
+        select_arm_base_pose[:2, 3] - target_obj_pose[:2, 3]
+    )
+    dis_eps = kwargs.get("dis_eps", 0.05)
+    select_arm_init_pose = (
+        env.left_arm_init_xpos if is_left else env.right_arm_init_xpos
+    )
+    if base_to_eef_xy_dis > base_to_obj_xy_dis and not torch.allclose(
+        select_arm_current_pose, select_arm_init_pose, rtol=1e-5, atol=1e-8
+    ):
+        delta = base_to_eef_xy_dis - (base_to_obj_xy_dis - dis_eps)
+        back_actions = move_by_relative_offset(
+            robot_name=robot_name,
+            dx=0.0,
+            dy=0.0,
+            dz=-delta,
+            env=env,
+            force_valid=force_valid,
+            mode="intrinsic",
+            sample_num=15,
+            **kwargs,
+        )
+        actions = (
+            np.concatenate([actions, back_actions], axis=0)
+            if actions is not None
+            else back_actions
+        )
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Pose ----------------------------------------
+    # Move the end-effector to a good place for starting grasping to avoid bad poses
+    select_arm_retract_pose = deepcopy(
+        env.left_arm_init_xpos if is_left else env.right_arm_init_xpos
+    )
+    select_arm_retract_pose = get_offset_pose(
+        select_arm_retract_pose, 0.15, "z", "intrinsic"
+    )
+    select_arm_retract_qpos = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        select_arm_retract_pose,
+        env.left_arm_init_qpos if is_left else env.right_arm_init_qpos,
+        force_valid=force_valid,
+        name="retract_to_good_pose",
+    )
+    qpos_list_back_to_retract = [select_arm_current_qpos, select_arm_retract_qpos]
+    sample_num = 30
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_back_to_retract,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    select_arm_current_qpos = select_arm_retract_qpos
+    select_arm_current_pose = select_arm_retract_pose
+
+    # Rotate the arm base to face the object for better grasping
+    delta_xy = target_obj_pose[:2, 3] - select_arm_base_pose[:2, 3]
+    dx, dy = delta_xy[0], delta_xy[1]
+    aim_horizontal_angle = np.arctan2(dy, dx)
+    select_arm_aim_qpos = deepcopy(select_arm_current_qpos)
+    select_arm_aim_qpos[0] = aim_horizontal_angle
+
+    # Get best grasp pose from affordance data
+    grasp_pose_object = env.init_obj_info.get(obj_name)["grasp_pose_obj"]
+    if (
+        grasp_pose_object[0, 2] > 0.5
+    ):  # whether towards x direction TODO: make it robust
+        # Align the object pose's z-axis with the arm's aiming direction
+        target_obj_pose = torch.tensor(
+            get_rotation_replaced_pose(
+                np.array(target_obj_pose),
+                float(select_arm_aim_qpos[0]),
+                "z",
+                "intrinsic",
+            )
+        )
+    best_pickpose = target_obj_pose @ grasp_pose_object
+    grasp_pose = deepcopy(best_pickpose)
+    grasp_pose_pre1 = deepcopy(grasp_pose)
+    grasp_pose_pre1 = get_offset_pose(grasp_pose_pre1, -pre_grasp_dis, "z", "intrinsic")
+
+    # Solve IK for pre-grasp and grasp poses
+    grasp_qpos_pre1 = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        grasp_pose_pre1,
+        select_arm_aim_qpos,
+        force_valid=force_valid,
+        name="grasp pre1",
+    )
+    grasp_qpos = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        grasp_pose,
+        grasp_qpos_pre1,
+        force_valid=force_valid,
+        name="grasp",
+    )
+
+    # Update env state to final grasp pose
+    env.set_current_qpos_agent(grasp_qpos, is_left=is_left)
+    env.set_current_xpos_agent(grasp_pose, is_left=is_left)
+
+    # ------------------------------------ Traj 0: init → aim ------------------------------------
+    qpos_list_init_to_aim = [select_arm_current_qpos, select_arm_aim_qpos]
+    # base_sample_num = 10
+    # base_angle = 0.08
+    # sample_num = max(int(delta_angle / base_angle * base_sample_num), 2)
+
+    sample_num = 10
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_init_to_aim,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ------------------------------------ Traj 1: aim → pre-grasp ------------------------------------
+    qpos_list_aim_to_pre1 = [select_arm_aim_qpos, grasp_qpos_pre1]
+    sample_num = kwargs.get("sample_num", 30)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_aim_to_pre1,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ------------------------------------ Traj 2: pre-grasp → grasp ------------------------------------
+    qpos_list_pre1_to_grasp = [grasp_qpos_pre1, grasp_qpos]
+    sample_num = kwargs.get("sample_num", 20)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_pre1_to_grasp,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    traj_actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+    actions = (
+        traj_actions
+        if actions is None
+        else np.concatenate([actions, traj_actions], axis=0)
+    )
+
+    # ------------------------------------ Close gripper ------------------------------------
+    close_gripper_actions = close_gripper(robot_name, env, **kwargs)
+    actions = np.concatenate([actions, close_gripper_actions], axis=0)
+
+    log_info(
+        f"Total generated trajectory number for grasp: {len(actions)}.", color="green"
+    )
+
+    return actions
+
+
+# def place_on_table(
+#     robot_name: str,
+#     obj_name: str,
+#     x: float = None,
+#     y: float = None,
+#     pre_place_dis: float = 0.08,
+#     env=None,
+#     force_valid=False,
+#     **kwargs
+# ):
+#
+#     # ---------------------------------------- Prepare ----------------------------------------
+#     select_qpos_traj = []
+#     ee_state_list_select = []
+#
+#     is_left, select_arm, select_arm_current_qpos, select_arm_current_pose, \
+#         select_arm_current_gripper_state = get_arm_states(env, robot_name)
+#
+#     grasp_pose_object = env.init_obj_info.get(obj_name).get('grasp_pose_obj')
+#     init_obj_pose = env.init_obj_info.get(obj_name).get('pose')
+#
+#     select_arm_base_pose = env.left_arm_base_pose if is_left else env.right_arm_base_pose
+#     delta_xy = init_obj_pose[:2, 3] - select_arm_base_pose[:2, 3]
+#     aim_horizontal_angle = np.arctan2(delta_xy[1], delta_xy[0])
+#
+#     # Align the object pose's z-axis with the arm's aiming direction
+#     init_obj_pose = torch.tensor(get_rotation_replaced_pose(np.array(init_obj_pose), float(aim_horizontal_angle), "z","intrinsic"))
+#
+#     place_pose = init_obj_pose @ grasp_pose_object
+#     place_pose[0, 3] = x
+#     place_pose[1, 3] = y
+#     place_pose[2, 3] += kwargs.get('eps', 0.02)
+#
+#     pre_place_pose = deepcopy(place_pose)
+#     pre_place_pose[2, 3] += pre_place_dis
+#
+#     # Solve IK for pre-place and place poses
+#     place_qpos_pre1 = get_qpos(env, is_left, select_arm, pre_place_pose, select_arm_current_qpos, force_valid=force_valid, name='place pre1')
+#     place_qpos = get_qpos(env, is_left, select_arm, place_pose, place_qpos_pre1, force_valid=force_valid, name='place')
+#
+#     # Update env states
+#     env.set_current_qpos_agent(place_qpos, is_left=is_left)
+#     env.set_current_xpos_agent(place_pose, is_left=is_left)
+#
+#     # ------------------------------------ Traj 0: current → pre-place ------------------------------------
+#     qpos_list_current_to_preplace = [select_arm_current_qpos, place_qpos_pre1]
+#     sample_num = 30
+#
+#     plan_trajectory(
+#         env,
+#         select_arm,
+#         qpos_list_current_to_preplace,
+#         sample_num,
+#         select_arm_current_gripper_state,
+#         select_qpos_traj,
+#         ee_state_list_select
+#     )
+#
+#     # ------------------------------------ Traj 1: pre-place → place ------------------------------------
+#     qpos_list_preplace_to_place = [place_qpos_pre1, place_qpos]
+#     sample_num = 20
+#
+#     plan_trajectory(
+#         env,
+#         select_arm,
+#         qpos_list_preplace_to_place,
+#         sample_num,
+#         select_arm_current_gripper_state,
+#         select_qpos_traj,
+#         ee_state_list_select
+#     )
+#
+#     # ---------------------------------------- Final ----------------------------------------
+#     traj_actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+#
+#     open_actions = open_gripper(robot_name, env, **kwargs)
+#
+#     actions = np.concatenate([traj_actions, open_actions], axis=0)
+#
+#     log_info(f"Total generated trajectory number for place on table: {len(actions)}.", color="green")
+#
+#     return actions
+
+
+def place_on_table(
+    robot_name: str,
+    obj_name: str,
+    x: float = None,
+    y: float = None,
+    pre_place_dis: float = 0.08,
+    env=None,
+    force_valid=False,
+    **kwargs,
+):
+
+    init_obj_height = env.init_obj_info.get(obj_name).get("height")
+    height = init_obj_height + kwargs.get("eps", 0.03)
+
+    traj_actions = move_to_absolute_position(
+        robot_name, x=x, y=y, z=height, env=env, force_valid=force_valid, **kwargs
+    )
+    open_actions = open_gripper(robot_name, env, **kwargs)
+
+    actions = np.concatenate([traj_actions, open_actions], axis=0)
+
+    log_info(
+        f"Total generated trajectory number for place on table: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def move_relative_to_object(
+    robot_name: str,
+    obj_name: str,
+    x_offset: float = 0,
+    y_offset: float = 0,
+    z_offset: float = 0,
+    env=None,
+    force_valid=False,
+    **kwargs,
+):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Pose ----------------------------------------
+    # Resolve target object
+    obj_uids = env.sim.get_rigid_object_uid_list()
+    if obj_name in obj_uids:
+        target_obj = env.sim.get_rigid_object(obj_name)
+    else:
+        log_error("No matched object.")
+
+    # Get object base pose (4x4 matrix)
+    target_obj_pose = target_obj.get_local_pose(to_matrix=True).squeeze(0)
+
+    # Construct target pose (preserve orientation)
+    move_target_pose = deepcopy(select_arm_current_pose)
+    move_target_pose[:3, 3] = target_obj_pose[:3, 3]
+    move_target_pose[0, 3] += x_offset
+    move_target_pose[1, 3] += y_offset
+    move_target_pose[2, 3] += z_offset
+
+    # Solve IK for target pose
+    move_target_qpos = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        move_target_pose,
+        select_arm_current_qpos,
+        force_valid=force_valid,
+        name="move relative to object",
+    )
+
+    # Update env states
+    env.set_current_qpos_agent(move_target_qpos, is_left=is_left)
+    env.set_current_xpos_agent(move_target_pose, is_left=is_left)
+
+    # ------------------------------------ Traj 1: init → target ------------------------------------
+    qpos_list_init_to_target = [select_arm_current_qpos, move_target_qpos]
+    sample_num = kwargs.get("sample_num", 30)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_init_to_target,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for move relative to object: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def move_to_absolute_position(
+    robot_name: str,
+    x: float = None,
+    y: float = None,
+    z: float = None,
+    env=None,
+    force_valid=False,
+    **kwargs,
+):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Pose ----------------------------------------
+    # Start from current pose, then selectively update xyz
+    move_pose = deepcopy(select_arm_current_pose)
+
+    current_xyz = move_pose[:3, 3].clone()
+
+    target_xyz = current_xyz.clone()
+    if x is not None:
+        target_xyz[0] = x
+    if y is not None:
+        target_xyz[1] = y
+    if z is not None:
+        target_xyz[2] = z
+
+    move_pose[:3, 3] = target_xyz
+
+    # Try IK on target pose
+    move_qpos = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        move_pose,
+        select_arm_current_qpos,
+        force_valid=force_valid,
+        name="move to absolute position",
+    )
+
+    # Update env states
+    env.set_current_qpos_agent(move_qpos, is_left=is_left)
+    env.set_current_xpos_agent(move_pose, is_left=is_left)
+
+    # ------------------------------------ Traj: init → target ------------------------------------
+    qpos_list_init_to_move = [select_arm_current_qpos, move_qpos]
+    sample_num = kwargs.get("sample_num", 30)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_init_to_move,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for move to absolute position: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def move_by_relative_offset(
+    robot_name: str,
+    dx: float = 0.0,
+    dy: float = 0.0,
+    dz: float = 0.0,
+    mode: str = "extrinsic",
+    env=None,
+    force_valid=False,
+    **kwargs,
+):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Pose ----------------------------------------
+    move_pose = deepcopy(select_arm_current_pose)
+
+    # Apply relative offsets (dx, dy, dz always floats)
+    move_pose = get_offset_pose(move_pose, dx, "x", mode)
+    move_pose = get_offset_pose(move_pose, dy, "y", mode)
+    move_pose = get_offset_pose(move_pose, dz, "z", mode)
+
+    # Solve IK
+    move_qpos = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        move_pose,
+        select_arm_current_qpos,
+        force_valid=force_valid,
+        name="move by relative offset",
+    )
+
+    # Update environment states
+    env.set_current_qpos_agent(move_qpos, is_left=is_left)
+    env.set_current_xpos_agent(move_pose, is_left=is_left)
+
+    # ------------------------------------ Traj: init → target ------------------------------------
+    qpos_list_init_to_move = [select_arm_current_qpos, move_qpos]
+    sample_num = kwargs.get("sample_num", 20)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_init_to_move,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for move by relative offset: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def back_to_initial_pose(robot_name: str, env=None, **kwargs):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    # Get arm states
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # Retrieve the initial joint configuration of this arm
+    target_qpos = env.left_arm_init_qpos if is_left else env.right_arm_init_qpos
+    target_qpos = torch.as_tensor(target_qpos, dtype=select_arm_current_qpos.dtype)
+
+    # ---------------------------------------- Pose ----------------------------------------
+    # Pre-back pose: move along tool z by a small offset (use intrinsic frame)
+    pre_back_pose = deepcopy(select_arm_current_pose)
+    pre_back_pose = get_offset_pose(pre_back_pose, -0.08, "z", "intrinsic")
+
+    # IK for pre-back
+    pre_back_qpos = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        pre_back_pose,
+        select_arm_current_qpos,
+        force_valid=kwargs.get("force_valid", False),
+        name="pre back pose",
+    )
+
+    # Update env states (move to target pose)
+    target_pose = env.get_arm_fk(qpos=target_qpos, is_left=is_left)
+    env.set_current_qpos_agent(target_qpos, is_left=is_left)
+    env.set_current_xpos_agent(target_pose, is_left=is_left)
+
+    # ------------------------------------ Traj: init → pre back_pose ------------------------------------
+    qpos_list_init_to_preback = [select_arm_current_qpos, pre_back_qpos]
+    sample_num = 20
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_init_to_preback,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ------------------------------------ Traj: init → initial_pose ------------------------------------
+    qpos_list_preback_to_target = [pre_back_qpos, target_qpos]
+    sample_num = kwargs.get("sample_num", 30)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_preback_to_target,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for back to initial pose: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def rotate_eef(robot_name: str, degree: float = 0, env=None, **kwargs):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Pose ----------------------------------------
+    # Compute new joint positions
+    rotated_qpos = deepcopy(select_arm_current_qpos)
+    rotated_qpos[5] += np.deg2rad(degree)
+
+    # Optional: limit checking (commented out by default)
+    # joint5_limit = env.get_joint_limits(select_arm)[5]
+    # if rotated_qpos[5] < joint5_limit[0] or rotated_qpos[5] > joint5_limit[1]:
+    #     log_warning("Rotated qpos exceeds joint limits.\n")
+
+    # Compute FK for new pose
+    rotated_pose = env.get_arm_fk(
+        qpos=rotated_qpos,
+        is_left=is_left,
+    )
+
+    # Update environment state
+    env.set_current_qpos_agent(rotated_qpos, is_left=is_left)
+    env.set_current_xpos_agent(rotated_pose, is_left=is_left)
+
+    # ------------------------------------ Traj 1: init → rotated ------------------------------------
+    qpos_list_init_to_rotated = [select_arm_current_qpos, rotated_qpos]
+    sample_num = kwargs.get("sample_num", 20)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_init_to_rotated,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for rotate eef: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def orient_eef(
+    robot_name: str,
+    direction: str = "front",  # 'front' or 'down'
+    env=None,
+    force_valid=False,
+    **kwargs,
+):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    # Get arm state
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Pose ----------------------------------------
+    # Generate replacement rotation matrix
+    replaced_rotation_matrix = np.eye(4)
+    if direction == "front":
+        rotation_matrix = R.from_euler("xyz", [180, -90, 0], degrees=True).as_matrix()
+        replaced_rotation_matrix[:3, :3] = (
+            rotation_matrix @ replaced_rotation_matrix[:3, :3]
+        )
+    elif direction == "down":
+        rotation_matrix = R.from_euler("x", 180, degrees=True).as_matrix()
+        replaced_rotation_matrix[:3, :3] = (
+            rotation_matrix @ replaced_rotation_matrix[:3, :3]
+        )
+    else:
+        log_error("Rotation direction must be 'front' or 'down'.")
+
+    rotation_replaced_pose = deepcopy(select_arm_current_pose)
+    rot_torch = torch.as_tensor(
+        replaced_rotation_matrix[:3, :3],
+        dtype=rotation_replaced_pose.dtype,
+        device=rotation_replaced_pose.device,
+    )
+    rotation_replaced_pose[:3, :3] = rot_torch
+
+    # Solve IK for the new pose
+    replace_target_qpos = get_qpos(
+        env,
+        is_left,
+        select_arm,
+        rotation_replaced_pose,
+        select_arm_current_qpos,
+        force_valid=force_valid,
+        name="replaced-rotation",
+    )
+
+    # ---------------------------------------- Update env ----------------------------------------
+    env.set_current_qpos_agent(replace_target_qpos, is_left=is_left)
+    env.set_current_xpos_agent(rotation_replaced_pose, is_left=is_left)
+
+    # ------------------------------------ Traj: init → target ------------------------------------
+    qpos_list_init_to_rotated = [select_arm_current_qpos, replace_target_qpos]
+    sample_num = kwargs.get("sample_num", 20)
+
+    plan_trajectory(
+        env,
+        select_arm,
+        qpos_list_init_to_rotated,
+        sample_num,
+        select_arm_current_gripper_state,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for orient eef: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def close_gripper(robot_name: str, env=None, **kwargs):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Traj ----------------------------------------
+    sample_num = kwargs.get("sample_num", 15)
+    execute_open = False  # False → closing motion
+
+    plan_gripper_trajectory(
+        env,
+        is_left,
+        sample_num,
+        execute_open,
+        select_arm_current_qpos,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for close gripper: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def open_gripper(robot_name: str, env=None, **kwargs):
+
+    # ---------------------------------------- Prepare ----------------------------------------
+    select_qpos_traj = []
+    ee_state_list_select = []
+
+    (
+        is_left,
+        select_arm,
+        select_arm_current_qpos,
+        select_arm_current_pose,
+        select_arm_current_gripper_state,
+    ) = get_arm_states(env, robot_name)
+
+    # ---------------------------------------- Traj ----------------------------------------
+    sample_num = kwargs.get("sample_num", 15)
+    execute_open = True  # True → opening motion
+
+    plan_gripper_trajectory(
+        env,
+        is_left,
+        sample_num,
+        execute_open,
+        select_arm_current_qpos,
+        select_qpos_traj,
+        ee_state_list_select,
+    )
+
+    # ---------------------------------------- Final ----------------------------------------
+    actions = finalize_actions(select_qpos_traj, ee_state_list_select)
+
+    log_info(
+        f"Total generated trajectory number for open gripper: {len(actions)}.",
+        color="green",
+    )
+
+    return actions
+
+
+def drive(
+    left_arm_action=None,
+    right_arm_action=None,
+    env=None,
+    **kwargs,
+):
+
+    if left_arm_action is not None and right_arm_action is not None:
+        len_left = len(left_arm_action)
+        len_right = len(right_arm_action)
+
+        if len_left < len_right:
+            diff = len_right - len_left
+            padding = np.repeat(left_arm_action[-1:], diff, axis=0)
+            left_arm_action = np.concatenate([left_arm_action, padding], axis=0)
+        elif len_right < len_left:
+            diff = len_left - len_right
+            padding = np.repeat(right_arm_action[-1:], diff, axis=0)
+            right_arm_action = np.concatenate([right_arm_action, padding], axis=0)
+
+        left_arm_index = env.left_arm_joints + env.left_eef_joints
+        right_arm_index = env.right_arm_joints + env.right_eef_joints
+        actions = np.zeros((len(right_arm_action), len(env.init_qpos)))
+        actions[:, left_arm_index] = left_arm_action
+        actions[:, right_arm_index] = right_arm_action
+
+    elif left_arm_action is None and right_arm_action is not None:
+        left_arm_index = env.left_arm_joints + env.left_eef_joints
+        right_arm_index = env.right_arm_joints + env.right_eef_joints
+        left_arm_action = finalize_actions(
+            env.left_arm_current_qpos, env.left_arm_current_gripper_state
+        )
+        left_arm_action = np.repeat(
+            left_arm_action[None, :], len(right_arm_action), axis=0
+        )
+
+        actions = np.zeros(
+            (len(right_arm_action), len(env.robot.get_qpos().squeeze(0))),
+            dtype=np.float32,
+        )
+        actions[:, left_arm_index] = left_arm_action
+        actions[:, right_arm_index] = right_arm_action
+
+    elif right_arm_action is None and left_arm_action is not None:
+        left_arm_index = env.left_arm_joints + env.left_eef_joints
+        right_arm_index = env.right_arm_joints + env.right_eef_joints
+        right_arm_action = finalize_actions(
+            env.right_arm_current_qpos, env.right_arm_current_gripper_state
+        )
+        right_arm_action = np.repeat(
+            right_arm_action[None, :], len(left_arm_action), axis=0
+        )
+
+        actions = np.zeros(
+            (len(left_arm_action), len(env.robot.get_qpos().squeeze(0))),
+            dtype=np.float32,
+        )
+        actions[:, left_arm_index] = left_arm_action
+        actions[:, right_arm_index] = right_arm_action
+
+    else:
+        log_error("At least one arm action should be provided.")
+
+    actions = torch.from_numpy(actions).to(dtype=torch.float32).unsqueeze(1)
+    actions = list(actions.unbind(dim=0))
+    for i in tqdm(range(len(actions))):
+        action = actions[i]
+        env.step(action)
+    return actions
+
+
+def save_observations(
+    step_id: int = 0,
+    step_name: str = None,
+    env=None,
+    **kwargs,
+):
+    # When using feedback script
+    log_dir = kwargs.get("log_dir")
+    if log_dir:
+        save_dir = log_dir / "camera_images"
+
+        # Prepare subfolder: {id}_generate_num/episode{current_check_num}
+        gen_id = kwargs.get("id", "unknown_id")
+        episode_id = kwargs.get("current_check_num", 0)
+
+        sub_dir = save_dir / f"{gen_id}_generate_num" / f"episode{episode_id}"
+        sub_dir.mkdir(parents=True, exist_ok=True)
+
+        # Encode image to Base64
+        base64_image = encode_image(env.get_obs_for_agent()["rgb"])
+
+        # Decode Base64 back to raw image bytes
+        import base64
+
+        img_bytes = base64.b64decode(base64_image)
+
+        # Ensure step_name is not None
+        step_name = step_name if step_name is not None else "unnamed_step"
+
+        # Save the decoded image
+        output_path = sub_dir / f"step{step_id}_{step_name}.png"
+        with open(output_path, "wb") as f:
+            f.write(img_bytes)
+
+        # Print save info
+        log_info(f"[save_observations] Saved image to: {output_path}")
+
+    # When only running the script (no feedback script)
+    else:
+        pass
diff --git a/embodichain/toolkits/toolkits.py b/embodichain/toolkits/toolkits.py
new file mode 100644
index 00000000..5d388fca
--- /dev/null
+++ b/embodichain/toolkits/toolkits.py
@@ -0,0 +1,33 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2025 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from abc import ABCMeta, abstractmethod
+import os
+from embodichain.utils.utility import load_json
+
+
+class ToolkitsBase(metaclass=ABCMeta):
+    @classmethod
+    def from_config(cls, path: str):
+        assert (
+            os.path.basename(path).split(".")[-1] == "json"
+        ), "only json file is supported."
+        config = load_json(path)
+        return config["ToolKits"][cls.__name__]
+
+    @abstractmethod
+    def call(self, **kwargs):
+        pass
diff --git a/pyproject.toml b/pyproject.toml
index 50fef254..6f5d8bf7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,8 @@ dependencies = [
   "dexsim_engine==0.3.9",
   "setuptools>=78.1.1",
   "gymnasium>=0.29.1",
+  "langchain",
+  "langchain-openai",
   "toppra==0.6.3",
   "pin",
   "pin-pink",
@@ -49,6 +51,8 @@ dependencies = [
   "black==24.3.0",
   "fvcore",
   "h5py",
+  "zmq==0.0.0",
+  "pycocotools",
 ]
 
 [project.optional-dependencies]