diff --git a/README.md b/README.md index c74e03c..ab9ccdb 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Please use Python 3.6 1. Install [PyRep](https://github.com/stepjam/PyRep) 2. Install [RLBench](https://github.com/stepjam/RLBench) 3. `pip install -r requirements.txt` +4. The requirements.txt include TensorForce and TensorFlow for Reinforcement Learning ## Example RLBench Usage Run `python rlbench_example.py` to launch the example script. @@ -25,4 +26,4 @@ This script contains example code on how to control the robot, get observations, ## Useful Files The following files may be useful to reference from the In the `rlbench` folder in the `RLBench` repo: * `rlbench/action_modes.py` - Different action modes to control the robot -* `rlbench/backend/observation.py` - All fields available in the observation object \ No newline at end of file +* `rlbench/backend/observation.py` - All fields available in the observation object diff --git a/TensorForceFiles/DQN_class.py b/TensorForceFiles/DQN_class.py new file mode 100644 index 0000000..7f0eaf8 --- /dev/null +++ b/TensorForceFiles/DQN_class.py @@ -0,0 +1,29 @@ +from tensorforce import Agent +import sys +sys.path.append('../') +sys.path.append(sys.path[0] + '/TensorForceFiles') +from TensorForce_class import * +import numpy as np + + +class TensorForceDQN(TensorForceClass): + + def __init__(self,num_states=6, num_actions=4, load=None): + super().__init__(num_states=num_states, num_actions=num_actions,load=load) + self.num_states = num_states + self.num_actions = num_actions + + + + def createRLagent(self, load): + states_dict = {'type': 'float', 'shape': self.num_states} + actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high} + + return Agent.create( + agent='dqn', + states = states_dict, # alternatively: states, actions, (max_episode_timesteps) + actions = actions_dict, + memory=10000, + exploration=0.3, + max_episode_timesteps= self.len_episode, + ) diff --git a/TensorForceFiles/TensorForce_class.py b/TensorForceFiles/TensorForce_class.py new file mode 100644 index 0000000..baf70f3 --- /dev/null +++ b/TensorForceFiles/TensorForce_class.py @@ -0,0 +1,118 @@ +from tensorforce import Agent +import numpy as np + + +class TensorForceClass: + + def __init__(self, num_states=6, num_actions=4, load=None): + + self.num_states = num_states + self.num_actions = num_actions + self.input_high = 1.0 + self.input_low = 0.0 + + + self.len_episode = 10 + self.explore = 0.5 + + self.x_r = [-0.025, 0.52] ## X Range: -0.025 - 0.52 + self.y_r = [-0.45, 0.45] ## Y Range: -0.45 - 0.45 + self.z_r = [0.751, 1.75] ## Z Range: 0.751 - 1.75 (Maybe a little higher) + + self.dist_before_action = 0 + self.dist_after_action = 0 + + self.has_object = False + + self.agent = self.createRLagent(load=load) + self.target_state = [] + + + def createRLagent(self, load=None): + states_dict = {'type': 'float', 'shape': self.num_states} + actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high} + + agent = Agent.create( + agent='tensorforce', + states = states_dict, # alternatively: states, actions, (max_episode_timesteps) + actions = actions_dict, + memory=10000, + update=dict(unit='timesteps', batch_size=64), + max_episode_timesteps= self.len_episode, + optimizer=dict(type='adam', learning_rate=3e-4), + policy=dict(network='auto'), + objective='policy_gradient', + reward_estimation=dict(horizon=20) + ) + + if not load ==None: + agent.restore(directory=load) + + return agent + + def act(self, obs, obj_poses): + gripper_pose = obs.gripper_pose + + + key = 'sugar' + ########################################################### + ###### PREPARE INPUT STATES TO RL FUNCTION ################ + if key in obj_poses: + target_state = list(obj_poses[key]) + target_state[2] += 0.1 + else: + self.has_object = True + target_state = [0.2, 0.0, 1.1] + # in_states = list(gripper_pose) + # in_states.extend(target_state) + + in_states = list(gripper_pose[:3]) + in_states.extend(list(target_state[:3])) + # in_states.extend(list(obj_poses['cupboard'])) + ###### PREPARE INPUT STATES TO RL FUNCTION ################ + ########################################################### + + actions = self.agent.act(states= in_states) + if self.explore > np.random.uniform(): + actions = np.random.uniform(low=0.25, high=0.75, size=self.num_actions) + + a_in = self.scaleActions(actions) + + actions2 = list(a_in[:3]) + [0,1,0,0] + list([actions[3]>0.5]) + + self.dist_before_action = np.linalg.norm(target_state[:3] - gripper_pose[:3]) + return actions2 + + + def scaleActions(self, actions): + + actions[0] = actions[0]*(self.x_r[1] - self.x_r[0]) + self.x_r[0] + actions[1] = actions[1]*(self.y_r[1] - self.y_r[0]) + self.y_r[0] + actions[2] = actions[2]*(self.z_r[1] - self.z_r[0]) + self.z_r[0] + + return actions + + def calculateReward(self): + terminal = False + reward = -self.dist_before_action/4 + + + if self.dist_after_action < 0.2: + reward += 20 + 1/self.dist_after_action + + temp = (self.dist_before_action - self.dist_after_action) / self.dist_before_action * 3 + if temp > 0: + reward += temp + else: + reward += min(temp,-0.1) + + + + if self.has_object: + reward += 100.0 + terminal = True + + + + return reward, terminal + diff --git a/TensorForceFiles/__pycache__/DQN_class.cpython-36.pyc b/TensorForceFiles/__pycache__/DQN_class.cpython-36.pyc new file mode 100644 index 0000000..8e4176f Binary files /dev/null and b/TensorForceFiles/__pycache__/DQN_class.cpython-36.pyc differ diff --git a/TensorForceFiles/__pycache__/TensorForce_class.cpython-36.pyc b/TensorForceFiles/__pycache__/TensorForce_class.cpython-36.pyc new file mode 100644 index 0000000..7f79ed0 Binary files /dev/null and b/TensorForceFiles/__pycache__/TensorForce_class.cpython-36.pyc differ diff --git a/TensorForceFiles/__pycache__/dqn_grasp_class.cpython-36.pyc b/TensorForceFiles/__pycache__/dqn_grasp_class.cpython-36.pyc new file mode 100644 index 0000000..eedffce Binary files /dev/null and b/TensorForceFiles/__pycache__/dqn_grasp_class.cpython-36.pyc differ diff --git a/TensorForceFiles/dqn_grasp_class.py b/TensorForceFiles/dqn_grasp_class.py new file mode 100644 index 0000000..489a638 --- /dev/null +++ b/TensorForceFiles/dqn_grasp_class.py @@ -0,0 +1,127 @@ +from tensorforce import Agent +import sys +sys.path.append('../') +sys.path.append(sys.path[0] + '/TensorForceFiles') +from DQN_class import * +import numpy as np + + +class DQN_grasp(TensorForceDQN): + + def __init__(self, num_actions=5, num_states=21, load=None): + self.num_states = num_states # Gripper pose, object pose + self.num_actions = num_actions # X, Y, Z, Yaw, Grasp + super().__init__(num_states= self.num_states, num_actions=self.num_actions, load=load) + + self.x_r = [-.001, .001] + self.y_r = [-.001, .001] + self.z_r = [0.752, 0.7] ## Z Range: 0.751 - 1.75 (Maybe a little higher) + self.yaw_r = [0, np.pi] + self.gripper_open = True + self.target_start_pose = [0,0,0] + self.ee_pos = [0,0,0] + self.explore = 0.3 + self.target_num = 0 + self.target_name='' + + def act(self, obs, obj_poses, key='sugar'): + gripper_pose = obs.gripper_pose + large_container_state = obj_poses['large_container'] + self.ee_pos = gripper_pose + ########################################################### + ###### PREPARE INPUT STATES TO RL FUNCTION ################ + if key in obj_poses: + target_state = list(obj_poses[key]) + self.has_object = False + else: + self.has_object = True + target_state = gripper_pose + target_state[3] +=0.1 + + in_states = list(gripper_pose) + in_states.extend(list(target_state)) + in_states.extend(list(large_container_state)) + ###### PREPARE INPUT STATES TO RL FUNCTION ################ + ########################################################### + + actions = self.agent.act(states=in_states) + + if self.explore > np.random.uniform(): + actions = np.random.uniform(low=0.0, high=1, size=self.num_actions) + + a_in = self.scaleActions(actions) + self.gripper_open = a_in[-1]>0.3 + + if self.num_actions == 5: + a_in[:2] += target_state[:2] + # a_in[:3] = gripper_pose[:3] + self.ee_pos = a_in[:3] + actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[3]) + list([self.gripper_open]) + + elif self.num_actions == 3: + self.ee_pos = [target_state[0], target_state[1], a_in[0]] + actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[1]) + list([self.gripper_open]) + + + self.dist_before_action = max(0.05,np.linalg.norm(target_state[:3] - gripper_pose[:3])) + return actions2 + + + def scaleActions(self, actions): + + if self.num_actions == 5: + actions[0] = actions[0]*(self.x_r[1] - self.x_r[0]) + self.x_r[0] + actions[1] = actions[1]*(self.y_r[1] - self.y_r[0]) + self.y_r[0] + actions[2] = actions[2]*(self.z_r[1] - self.z_r[0]) + self.z_r[0] + actions[3] = actions[3]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0] + else: + actions[0] = actions[0]*(self.z_r[1] - self.z_r[0]) + self.z_r[0] + actions[1] = actions[1]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0] + + if self.has_object: actions[-1] = 0 + + return actions + + + def calculateReward(self, i): + reward = 0 + terminal = False + reward -= i + + delta_dist = self.dist_before_action - self.dist_after_action + temp = (self.dist_before_action - self.dist_after_action) / self.dist_before_action * 3 + + if delta_dist > 0: + reward = temp + else: + reward += min(temp,-0.1) + + # print(self.has_object) + if self.has_object: + reward += 100 + print("Reward after grasping: ", reward) + terminal = True + # if self.ee_pos[-1] > self.z_r[1] - 0.05: + # reward += 250 + # terminal = True + print(self.dist_after_action) + + if not self.gripper_open and self.dist_before_action>0.1: + reward -= 20 + + if self.gripper_open and self.dist_before_action>0.1: + reward += 10 + + if not self.gripper_open and not self.has_object: + reward -= 3 + + return reward, terminal + + + def calculateQuaternion(self, angle): + firstElement = np.sin(angle/2) + secondElement = -np.cos(angle/2) + return [firstElement, secondElement, 0, 0] + + + diff --git a/TensorForceFiles/dqn_place_class.py b/TensorForceFiles/dqn_place_class.py new file mode 100644 index 0000000..dd4e6e9 --- /dev/null +++ b/TensorForceFiles/dqn_place_class.py @@ -0,0 +1,155 @@ +from tensorforce import Agent +from TensorForceRL_parent import * +from dqn_grasp_class import * +import numpy as np +from quaternion import from_rotation_matrix, quaternion, as_euler_angles, from_euler_angles, as_quat_array +from scipy.spatial.transform import Rotation as R + + + +class DQN_place(DQN_grasp): + + def __init__(self, num_states=16, num_actions=5, load=None): + self.num_states = 16 # Object pose, cabinet pose, Gripper Open, Target_num + self.num_actions = 5 # X, Y, Z, yaw, grasp + super().__init__(num_states=num_states, num_actions=num_actions, load=load) + + self.x_r = [-0.1, 0.05] # Cupboard coords: Point down + self.y_r = [-0.1, 0.1] # Cupboad coords: Point to left of cupboard + self.z_r = [-0.275, 0.1] # Cupboard coords: Point to cupboard + self.z_r = [-0.1, 0.1] # Cupboard coords: Point to cupboard + self.yaw_r = [0, np.pi] + self.explore = 0.5 + self.stage_point = [] + self.obj_poses = [] + + def createRLagent(self, load): + states_dict = {'type': 'float', 'shape': self.num_states} + actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high} + + return Agent.create( + agent='dqn', + states = states_dict, # alternatively: states, actions, (max_episode_timesteps) + actions = actions_dict, + memory=10000, + exploration=0.75, + max_episode_timesteps= self.len_episode, + ) + + def getInStates(self, key, obj_poses): + target_state = list(obj_poses[key]) + self.target_state = target_state + self.has_object = False + + in_states = list(self.ee_pos) + in_states.extend(list(self.target_state)) + + in_states.extend(list([self.gripper_open*1.0, self.target_num])) + return in_states + + + def act(self, obs, obj_poses, key='crackers'): + self.stage_point = obj_poses['waypoint3'] + gripper_pose = obs.gripper_pose + self.ee_pos = gripper_pose + self.obj_poses = obj_poses + + in_states = self.getInStates('waypoint4', obj_poses) + + actions = self.agent.act(states=in_states) + + # if self.explore > np.random.uniform(): + # actions = np.random.uniform(low=0.0, high=1, size=self.num_actions) + + a_in = self.scaleActions(actions) + self.gripper_open = a_in[-1]>0.5 + + self.ee_pos = a_in[:3] + # actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[3]) + list([self.gripper_open]) + actions2 = list(self.ee_pos) + list(self.stage_point[3:7]) + list([self.gripper_open]) + + self.dist_before_action = max(0.05,np.linalg.norm(self.target_state[:3] - gripper_pose[:3])) + + return actions2 + + + def scaleActions(self, actions): + + # Scale all the Actions + x_pre = actions[0]*(self.x_r[1] - self.x_r[0]) + self.x_r[0] + y_pre = actions[1]*(self.y_r[1] - self.y_r[0]) + self.y_r[0] + z_pre = actions[2]*(self.z_r[1] - self.z_r[0]) + self.z_r[0] + actions[3] = actions[3]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0] + + + trans = np.array([x_pre, y_pre, z_pre]) + + # Convert the Cabinet Frame Coordinates into the World Fram Coordinates + trans = self.convertTargetCoordsToWorld(trans) + + actions[:3] = trans + if self.has_object: actions[-1] = 0 + + return actions + + + def calculateReward(self): + if self.gripper_open: + t_pos = self.obj_poses[self.target_name[:-12]] + t_pos = self.convertTargetCoordsToCabinet(t_pos) + in_cab = self.x_r[0] <= t_pos[0] <= self.x_r[1] + in_cab = in_cab and self.y_r[0] <= t_pos[1] <= self.z_r[1] + in_cab = in_cab and self.z_r[0] <= t_pos[2] <= self.z_r[1] + if not in_cab: + reward = -1 + else: + reward = 5 + terminal = True + + else: + terminal = False + reward = 0 + + return reward, terminal + + def calculateQuaternion(self, angle): + firstElement = np.sin(angle/2) + secondElement = -np.cos(angle/2) + return [0, secondElement, 0, firstElement] + + + #### CODE TO TRANSLATE BETWEEN CUPBOARD AND WORLD FRAMES IMPLEMENT WITH RL SCALE AND ACTIONS + def convertTargetCoordsToWorld(self, target_coord, target_name='waypoint4'): + t_pos = self.obj_poses[target_name] + + # Create Rotation Matrix + rot_val = R.from_quat(self.obj_poses[target_name][3:7]) + to_world_mat = rot_val.as_matrix() + + # Make Rotation Matrix an Affine 4x4 matrix + to_world_mat = np.hstack((to_world_mat,np.array(t_pos[:3]).reshape([-1,1]))) + to_world_mat = np.vstack((to_world_mat, [0,0,0,1])) + + # Convert the Coordinates + go_pt = np.hstack((target_coord, 1)) + world_pt = to_world_mat @ go_pt.T + + return world_pt[:3] + + def convertTargetCoordsToCabinet(self, target_coord, target_name='waypoint4'): + t_pos = self.obj_poses[target_name] + + # Create Rotation Matrix + rot_val = R.from_quat(self.obj_poses[target_name][3:7]) + to_world_mat = rot_val.as_matrix() + + # Make Rotation Matrix an Affine 4x4 matrix + to_world_mat = np.hstack((to_world_mat,np.array(t_pos[:3]).reshape([-1,1]))) + to_world_mat = np.vstack((to_world_mat, [0,0,0,1])) + to_cupboard_mat = np.linalg.inv(to_world_mat) + + # Convert the Coordinates + world_pt = np.hstack((target_coord[:3],1)) + cab_pt = to_cupboard_mat @ world_pt.T + + return cab_pt \ No newline at end of file diff --git a/__pycache__/helper.cpython-36.pyc b/__pycache__/helper.cpython-36.pyc new file mode 100644 index 0000000..bd130d6 Binary files /dev/null and b/__pycache__/helper.cpython-36.pyc differ diff --git a/dqn_grasp/agent.data-00000-of-00001 b/dqn_grasp/agent.data-00000-of-00001 new file mode 100644 index 0000000..390c9ba Binary files /dev/null and b/dqn_grasp/agent.data-00000-of-00001 differ diff --git a/dqn_grasp/agent.index b/dqn_grasp/agent.index new file mode 100644 index 0000000..626a171 Binary files /dev/null and b/dqn_grasp/agent.index differ diff --git a/dqn_grasp/agent.json b/dqn_grasp/agent.json new file mode 100644 index 0000000..b9ec65f --- /dev/null +++ b/dqn_grasp/agent.json @@ -0,0 +1 @@ +{"agent": "dqn", "states": {"type": "float", "shape": 21}, "actions": {"type": "float", "shape": 5, "min_value": 0.0, "max_value": 1.0}, "max_episode_timesteps": 10, "network": "auto", "memory": 10000, "batch_size": 32, "update_frequency": null, "start_updating": null, "learning_rate": 0.0003, "huber_loss": 0.0, "horizon": 0, "discount": 0.99, "estimate_terminal": false, "target_sync_frequency": 1, "target_update_weight": 1.0, "preprocessing": null, "exploration": 0.3, "variable_noise": 0.0, "l2_regularization": 0.0, "entropy_regularization": 0.0, "name": "agent", "device": null, "parallel_interactions": 1, "seed": null, "execution": null, "saver": null, "summarizer": null, "recorder": null, "config": null} \ No newline at end of file diff --git a/dqn_grasp/agent.meta b/dqn_grasp/agent.meta new file mode 100644 index 0000000..18efe6e Binary files /dev/null and b/dqn_grasp/agent.meta differ diff --git a/dqn_grasp/checkpoint b/dqn_grasp/checkpoint new file mode 100644 index 0000000..f5eb96b --- /dev/null +++ b/dqn_grasp/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "agent" +all_model_checkpoint_paths: "agent" diff --git a/helper.py b/helper.py new file mode 100644 index 0000000..8a6a272 --- /dev/null +++ b/helper.py @@ -0,0 +1,467 @@ +''' +This file contains helper functions for the 16-662 Robot Autonomy +Final Project. This was completed by Bryson Jones, Quentin Cheng, +and Shaun Ryer. +Carnegie Mellon University +MRSD Class of 2021 +''' + +import numpy as np +import scipy as sp +from quaternion import from_rotation_matrix, quaternion, as_euler_angles, from_euler_angles, as_float_array +# import imutils +# from imutils import perspective +# from imutils import contours +from collections import namedtuple +from shapely.geometry import Point, Polygon +import time +import warnings + + +# def generate_bounding_box(rgb_img, lower_bound, upper_bound): +# ''' +# :param rgb_img: an rgb image with an object that should have a bounding +# box overlaid onto it +# :param lower_bound: lower thresholding bound (1 x 3) +# :param upper_bound: upper thresholding bound (1 x 3) +# :return: bgr_modified_img: image in bgr format, which has bounding box +# overlaid +# ''' + +# # convert rgb to hsv +# bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR) + +# # create bounded mask +# mask_red = cv2.inRange(bgr, lower_bound, upper_bound) + +# # bitwise mask +# redBoxOnlyFrame = cv2.bitwise_and(bgr.astype(np.uint8), bgr.astype(np.uint8), mask=mask_red) + +# # convert image to gray +# gray = cv2.cvtColor(redBoxOnlyFrame, cv2.COLOR_BGR2GRAY) +# gray = cv2.GaussianBlur(gray, (7, 7), 0) +# # perform edge detection, then perform a dilation + erosion to +# # close gaps in between object edges +# edges = cv2.Canny(gray, 50, 100) +# edges = cv2.dilate(edges, None, iterations=1) +# edges = cv2.erode(edges, None, iterations=1) + +# # find contours in the edge map +# cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, +# cv2.CHAIN_APPROX_SIMPLE) +# cnts = imutils.grab_contours(cnts) +# # sort the contours from left-to-right and initialize the +# # 'pixels per metric' calibration variable +# (cnts, _) = contours.sort_contours(cnts) + +# # loop over the contours individually +# bgr_modified_img = bgr.copy() +# for c in cnts: +# # if the contour is not sufficiently large, ignore it +# if cv2.contourArea(c) < 100: +# continue +# # compute the rotated bounding box of the contour +# box = cv2.minAreaRect(c) +# box = cv2.boxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box) +# box = np.array(box, dtype="int") +# # order the points in the contour such that they appear +# # in top-left, top-right, bottom-right, and bottom-left +# # order, then draw the outline of the rotated bounding +# # box +# box = perspective.order_points(box) +# cv2.drawContours(bgr_modified_img, [box.astype("int")], -1, (0, 255, 0), 2) + +# cv2.imwrite('test.jpg', bgr_modified_img) + +# return bgr_modified_img + +def pick_up_box_variables(large_container_pos,obs,z,small_container_pos): + above_large_container = large_container_pos + above_large_container[2] += 0.3 + + notflipped = quaternion(obs.gripper_pose[3],obs.gripper_pose[4],obs.gripper_pose[5],obs.gripper_pose[6]) + notflipped_array = as_float_array(notflipped) + flipped_euler = as_euler_angles(notflipped) + + + amount_2_flip = -2.4 + if np.cos(z)<0: + amount_2_flip = -amount_2_flip + flipped_euler[2] += amount_2_flip + flipped = from_euler_angles(flipped_euler) + flipped_array = as_float_array(flipped) + + above_large_container[0] += 0.070*np.sin(z) + above_large_container[1] += 0.070*np.cos(z) + + small_container_pos_original = small_container_pos + 0*small_container_pos + small_container_pos_original[0] = small_container_pos[0]+0.070*np.sin(z) + small_container_pos_original[1] = small_container_pos[1]+0.070*np.cos(z) + + return above_large_container,small_container_pos_original,notflipped_array,flipped_array + +def pick_up_box(state,obs,gripper,small_container0_obj,z,small_container_pos,small_container_pos_original,gripper_pose,above_large_container,flipped_array,notflipped_array): + # move to red box + # state = 5 + if state == -1: + print('state -1') + action = np.concatenate((small_container_pos_original+[np.sin(z)*0.02,np.cos(z)*0.02,0.3], gripper_pose[3:7], np.array([1]))) + state = 0 + return state,action + + elif state == 0: + print('state 0') + action = np.concatenate((small_container_pos_original+[np.sin(z)*0.02,np.cos(z)*0.02,0.01], gripper_pose[3:7], np.array([1]))) + state = 1 + return state,action + # wait + elif state == 1: + print('state 1') + action = np.concatenate((obs.gripper_pose, np.array([1]))) + # action[-1] = 0 + + state = 2 + return state,action + # moving sideways + elif state == 2: + pose = obs.gripper_pose + pose[0] += np.sin(z)*0.02 + pose[1] += np.cos(z)*0.02 + action = np.concatenate((pose, np.array([0]))) + gripper.grasp(small_container0_obj) + + state = 3 + return state,action + # moving up + elif state == 3: + pose = obs.gripper_pose + pose[2] += 0.1 + gripper.grasp(small_container0_obj) + action = np.concatenate((pose, np.array([0]))) + state = 4 + return state,action + # above big container + elif state == 4: + action = np.concatenate((above_large_container, obs.gripper_pose[3:7], np.array([0]))) + state = 5 + return state,action + #flip box + elif state ==5: + # print(flipped_array) + action = np.concatenate((above_large_container, flipped_array, np.array([0]))) + state = 6 + return state,action + elif state ==6: + # print(flipped_array) + action = np.concatenate((above_large_container, notflipped_array, np.array([0]))) + state = 7 + return state,action + elif state ==7: + # print(flipped_array) + action = np.concatenate((small_container_pos_original+[0,0,0.1], notflipped_array, np.array([0]))) + state = 8 + return state,action + elif state ==8: + # print(flipped_array) + action = np.concatenate((small_container_pos_original, notflipped_array, np.array([0]))) + state = 9 + return state,action + elif state ==9: + # print(flipped_array) + action = np.concatenate((small_container_pos_original, notflipped_array, np.array([1]))) + state = 10 + return state,action + elif state ==10: + # print(flipped_array) + action = np.concatenate((small_container_pos_original+[0,0,0.3], notflipped_array, np.array([1]))) + state = 11 + return state,action + else: + action = np.concatenate((obs.gripper_pose, np.array([1]))) + return state,action + + +def rl_get_objects(RLagent, task, shape, obs, obj_poses, box_pos, success, i): + + target_name = 'Shape' + shape + + # Update arm states + RLagent.has_object = len(task._robot.gripper._grasped_objects) > 0 + + actions = RLagent.act(obs,obj_poses, key=target_name) + + try: + obs, reward, terminal = task.step(actions) + print("RL moved me") + + ### Check current distance + target_state = list(obj_poses[target_name]) + RLagent.dist_after_action = max(0.05,np.linalg.norm(target_state[:3] - obs.gripper_pose[:3])) + # RLagent.dist_after_action = np.linalg.norm(target_state[:3] - obs.gripper_pose[:3]) + + ### Calculate reward + reward, terminal = RLagent.calculateReward(i) + + except: + print("Couldn't move") + # reward = -RLagent.dist_before_action*5 + ### Calculate reward + reward, terminal = RLagent.calculateReward(i) + # terminal = False + + ## Observe results + RLagent.agent.observe(terminal=terminal, reward=reward) + + if RLagent.has_object: + success = True + print("Success!") + + return obs, success, reward + +def get_objects(RLagent, task, obj_pose_sensor, state, shape, obs, object_pos, box_pos): + + #move above object + if state == 0: + + action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1]))) + state = 1 + + return action, state, shape + + # RL to take over moving to and grasp object + elif state == 1: + success = False + total_reward = 0 + i = 0 + while (not success): + obj_poses = obj_pose_sensor.get_poses() + obs, success, reward = rl_get_objects(RLagent, task, shape, obs, obj_poses, box_pos, success, i) + i += 1 + total_reward += reward + print('Iteration: %i, Reward: %.1f' %(i, reward)) + if i>=50: + "Failed to grasp, skipping the object." + success = True + + action = np.concatenate((obs.gripper_pose, np.array([0]))) + print('Success on Iteration: %i, Total Reward: %.1f' %(i, total_reward)) + RLagent.agent.save(directory='dqn_grasp') + state = 3 + + return action, state, shape + + # #move to object + # elif state == 1: + + # action = np.concatenate((object_pos[0:3]+[0,0,0.001], obs.gripper_pose[3:7], np.array([1]))) + + # state = 2 + + # return action, state, shape + + # #move up + # elif state == 2: + + # action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0]))) + # state = 3 + + # return action, state, shape + + #move above box + elif state == 3: + + action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0]))) + state = 4 + + return action, state, shape + + #move close to box + if state == 4: + + action = np.concatenate((box_pos[0:3]+[0,0,0.07], obs.gripper_pose[3:7], np.array([0]))) + state = 5 + + return action, state, shape + + #drop and move away + + else: + shape = str(int(shape) + 2) + state=0 + action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1]))) + return action, state, shape + + + + # elif state == 1: + # if obs.gripper_pose[2] > .95: + # state = 2 + # return [0, 0, stepsize, 0, 0, 0, 1, 0], state, shape + + # elif state == 2: + # if abs(box_pos[0] - obs.gripper_pose[0]) > .005 or abs(box_pos[1] - obs.gripper_pose[1]) > .005: + # action = np.concatenate((np.array([box_pos[0], box_pos[1], 0]), obs.gripper_pose[3:7], np.array([1]))) + + # else: + # action = np.concatenate((box_pos, obs.gripper_pose[3:7], np.array([1]))) + + # if np.isclose(box_pos, obs.gripper_pose, .05): + # state = 3 + # return [0, 0, 0, 0, 0, 0, 1, 1], state, shape + + # return action, state, shape + + # elif state == 3: + # if obs.gripper_pose[2] > 1: + # state = 0 + # shape = str(int(shape) + 2) + # return [0, 0, stepsize, 0, 0, 0, 1, 1], state, shape + + +def checkShapePosition(obj_poses, obs): + # have while loop to check for all of shape positions being accessible + shapePosAccessible = False + while not shapePosAccessible: + try: + shape0_pos = obj_poses['Shape0'][:3] + print('Shape0 position accessed') + shape2_pos = obj_poses['Shape2'][:3] + print('Shape2 position accessed') + shape4_pos = obj_poses['Shape4'][:3] + print('All shape positions are available') + shapePosAccessible = True + + except KeyError: + warnings.warn('Can''t access all shapes yet.') + time.sleep(1) + + # once we can access the object shape positions, we need to + # we need to check if there are within the bounds of the large container + largeContainerPosition = obj_poses['large_container'][:3] + largeContainerOrientation = obj_poses['large_container'][3:] + + largeContainerOrientation = quaternion(largeContainerOrientation[0], + largeContainerOrientation[1], + largeContainerOrientation[2], + largeContainerOrientation[3]) + largeContainerOrientation = as_euler_angles(largeContainerOrientation) + + transformValues = namedtuple('transformValues', ['x', 'y', 'yaw']) + transformValuesPosition = transformValues(x=0, y=0, yaw=largeContainerOrientation[1]) + + H_box = getTransform(transformValuesPosition) + + # get dimensions of box !!!!!!!!!!!!! THESE ARE RANDOM VALUES RIGHT NOW!!!!!!!! + largeContainerLength = .34 # x dimension + largeContainerWidth = .34 # y dimension + + boxCornerPoints = np.array([[largeContainerLength / 2, -largeContainerLength / 2, -largeContainerLength / 2, largeContainerLength / 2], + [largeContainerWidth / 2, largeContainerWidth / 2, -largeContainerWidth / 2, -largeContainerWidth / 2], + [1, 1, 1, 1]]) + + rotatedBoxCornerPoints = H_box @ boxCornerPoints + + finalBoxCornerPoints = rotatedBoxCornerPoints + np.array([[largeContainerPosition[0]], + [largeContainerPosition[1]], + [1]]) + + finalBoxCornerCoords = [(finalBoxCornerPoints[0, 0], finalBoxCornerPoints[1, 0]), + (finalBoxCornerPoints[0, 1], finalBoxCornerPoints[1, 1]), + (finalBoxCornerPoints[0, 2], finalBoxCornerPoints[1, 2]), + (finalBoxCornerPoints[0, 3], finalBoxCornerPoints[1, 3])] + + boxPolygon = Polygon(finalBoxCornerCoords) + + # check if points are in bounds + shapes = [shape0_pos, shape2_pos, shape4_pos] + shapeNum = ['0', '2', '4'] + shapesToBeReset = [] + for i in range(3): + pointToCheck = Point(shapes[i][0], shapes[i][1]) + isWithinBox = pointToCheck.within(boxPolygon) + print('Shape' + shapeNum[i], 'within bounds: ', isWithinBox) + if not isWithinBox:#################################################need to change back: if not isWithinBox: + shapesToBeReset.append(shapeNum[i]) + + mode = 3 + + return mode, shapesToBeReset + + +def getTransform(ArgStruct): + ''' + getTransformation + + Returns the 2D homogeneous transformation for the given pose. + + Args: ArgStruct + ArgStruct.x: x axis position of frame 2's origin in frame 1 [m] + ArgStruct.y: y axis position of frame 2's origin in frame 1 [m] + ArgStruct.yaw: angle between frames [rad] + + Returns: H + H: 2D homogeneous transformation matrix [m] + + Raises: + none + + ''' + H = np.asarray([[np.cos(ArgStruct.yaw), -np.sin(ArgStruct.yaw), ArgStruct.x], + [np.sin(ArgStruct.yaw), np.cos(ArgStruct.yaw), ArgStruct.y], + [0, 0, 1]]) + + return H + +def put_in_big_container(state, shape, obs, object_pos, box_pos): + + print("state:",state) + #move above object + if state == 0: + + action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1]))) + state = 1 + + return action, state, 1 + #move to object + elif state == 1: + + action = np.concatenate((object_pos[0:3]+[0,0,0.001], obs.gripper_pose[3:7], np.array([1]))) + + state = 2 + + return action, state, 1 + #move up + elif state == 2: + + action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0]))) + state = 3 + + return action, state, 1 + + #move above box + elif state == 3: + + action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0]))) + state = 4 + + return action, state, 1 + + #move close to box + if state == 4: + + action = np.concatenate((box_pos[0:3]+[0,0,0.1], obs.gripper_pose[3:7], np.array([0]))) + state = 5 + + return action, state, 1 + + #drop and move away + + else: + shape = str(int(shape) + 2) + state=0 + action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1]))) + return action, state, 0 + + + + diff --git a/requirements.txt b/requirements.txt index a10ba80..f08f8cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ numpy scipy -numpy-quaternion \ No newline at end of file +numpy-quaternion +tensorforce +tensorforce[tf] diff --git a/rlbench_empty_container.py b/rlbench_empty_container.py index 2085fa1..8823ef3 100644 --- a/rlbench_empty_container.py +++ b/rlbench_empty_container.py @@ -1,18 +1,28 @@ import numpy as np import scipy as sp -from quaternion import from_rotation_matrix, quaternion +from quaternion import from_rotation_matrix, quaternion, as_euler_angles, from_euler_angles, as_float_array +import time +from collections import namedtuple from rlbench.environment import Environment from rlbench.action_modes import ArmActionMode, ActionMode from rlbench.observation_config import ObservationConfig from rlbench.tasks import * +from helper import pick_up_box, pick_up_box_variables, get_objects +from helper import * + +import sys +sys.path.append('../') +sys.path.append(sys.path[0] + '/TensorForceFiles') +from tensorforce import Agent +from dqn_grasp_class import DQN_grasp + def skew(x): return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]]) - def sample_normal_pose(pos_scale, rot_scale): ''' Samples a 6D pose from a zero-mean isotropic normal distribution @@ -25,16 +35,6 @@ def sample_normal_pose(pos_scale, rot_scale): return pos, quat_wxyz - -class RandomAgent: - - def act(self, obs): - delta_pos = [(np.random.rand() * 2 - 1) * 0.005, 0, 0] - delta_quat = [0, 0, 0, 1] # xyzw - gripper_pos = [np.random.rand() > 0.5] - return delta_pos + delta_quat + gripper_pos - - class MoveAgent: def act(self, obs, target_pos): @@ -49,7 +49,6 @@ def act(self, obs, target_pos): return np.concatenate((robotStep, delta_quat, gripper_pos)) - class NoisyObjectPoseSensor: def __init__(self, env): @@ -77,34 +76,154 @@ def get_poses(self): obj_poses[name] = pose return obj_poses + + def get_objs(self): + objs = self._env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False) + return objs if __name__ == "__main__": - action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE) # See rlbench/action_modes.py for other action modes + + mode = 0 + action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) # See rlbench/action_modes.py for other action modes env = Environment(action_mode, '', ObservationConfig(), False) task = env.get_task(EmptyContainer) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable - agent = moveAgent() + agent = MoveAgent() obj_pose_sensor = NoisyObjectPoseSensor(env) - + update = 0 + count = 0 + + # TensorForce Initialization + rl_grasp_agent = DQN_grasp(load='dqn_grasp') + # rl_grasp_agent = DQN_grasp() + + gripper = task._robot.gripper + objs = obj_pose_sensor.get_objs() + for obj in objs: + if (obj.get_name()=='small_container0'): + small_container0_obj = obj + descriptions, obs = task.reset() print(descriptions) + global state + state = 0 + global shape + shape = '0' + + # initialize shape poses + obj_poses = obj_pose_sensor.get_poses() + shape_pos = obj_poses['Shape' + shape][:3] + most_recent_shape_pos = shape_pos + + # Getting noisy object poses + small_container_pos = obj_poses['small_container0'][:3] + small_container_pos_original = obj_poses['small_container0'][:3] + large_container_pos = obj_poses['large_container'][:3] + small_container_quat2 = obj_poses['small_container0'][3:7] + small_container_pos[2] -= 0.01 + small_container_quat = quaternion(obj_poses['small_container0'][3], obj_poses['small_container0'][4], obj_poses['small_container0'][5],obj_poses['small_container0'][6]) + small_container_euler = as_euler_angles(small_container_quat) + + z = small_container_euler[0] + + above_large_container,small_container_pos_original, notflipped_array,flipped_array = pick_up_box_variables(large_container_pos,obs,z,small_container_pos) + while True: - # Getting noisy object poses - obj_poses = obj_pose_sensor.get_poses() - small_container_pos = obj_poses['small_container0'][:3] - - # Getting various fields from obs - current_joints = obs.joint_positions - gripper_pose = obs.gripper_pose - rgb = obs.wrist_rgb - depth = obs.wrist_depth - mask = obs.wrist_mask - - # Perform action and step simulation - action = agent.act(obs, small_container_pos) - obs, reward, terminate = task.step(action) - # if terminate: - # break + print('mode is: ', mode) + if mode==0: + + # Getting noisy object poses + obj_poses = obj_pose_sensor.get_poses() + small_container_pos = obj_poses['small_container0'][:3] + + + # Perform action and step simulation + if int(shape) < 5: # shape goes from 0, 2, 4 + try: + shape_pos = obj_poses['Shape' + shape][:3] + most_recent_shape_pos = shape_pos + except KeyError: + shape_pos = most_recent_shape_pos + + print('Shape is: ', int(shape)) + + action, state, shape = get_objects(rl_grasp_agent, task, obj_pose_sensor, state, shape, obs, shape_pos, small_container_pos) + print('State is: ', state) + # print("done") + if int(shape) > 4: + print("Finished all pickup") + mode = 4 + + elif mode == 0.5: + obj_poses = obj_pose_sensor.get_poses() + small_container_pos = obj_poses['small_container0'][:3] + gripper_pose = obs.gripper_pose + small_container_pos[0] += 0.070*np.sin(z) + small_container_pos[1] += 0.070*np.cos(z) + update=-1 + mode = 1 + + elif mode == 1: + update, action = pick_up_box(update,obs,gripper, + small_container0_obj,z, + small_container_pos, + small_container_pos_original, + gripper_pose,above_large_container, + flipped_array,notflipped_array) + + print('state update (mode 1, pick-up box): ', update) + + shape = '0' + if update == 11: + mode = 2 + + elif mode == 2: + obj_poses = obj_pose_sensor.get_poses() + mode, shapesToBeReset = checkShapePosition(obj_poses, obs) + action = np.concatenate((obs.gripper_pose, np.array([1]))) + print(mode) + state = 0 + i=0 + do_once_per_loop=0 + # mode = 3 + + elif mode ==3: + + if shapesToBeReset==[]: + + mode=4 + else: + obj_poses = obj_pose_sensor.get_poses() + large_container_pos = obj_poses['large_container'][:3] + + + + + if do_once_per_loop==0: + print(shapesToBeReset) + shape = shapesToBeReset[i] + shape_pos = obj_poses['Shape' + shape][:3] + + print("shape",shape) + action, state, do_once_per_loop = put_in_big_container(state, shape, obs, shape_pos, large_container_pos) + if (do_once_per_loop==0): + if ((i+1)>=len(shapesToBeReset)) : + mode=4 + else: + i+=1 + shape = shapesToBeReset[i] + + elif mode==4: + print("end") + state = 0 + update = 0 + count = 0 + shape = '0' + mode=0 + descriptions, obs = task.reset() + action = np.concatenate((obs.gripper_pose, np.array([1]))) + + obs, reward, terminate = task.step(action) env.shutdown() diff --git a/test.jpg b/test.jpg new file mode 100644 index 0000000..1fd9f8c Binary files /dev/null and b/test.jpg differ