Skip to content
Open

RL #2

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
5d8e174
Move to container edge. Commented out lines are buggy
htqcheng Apr 5, 2020
01a3752
Different States
htqcheng Apr 9, 2020
e2ed4ee
Lifting the red container
htqcheng Apr 12, 2020
f7d2b02
Quaternions?
htqcheng Apr 12, 2020
3247b4c
Quaternion 2
htqcheng Apr 12, 2020
6ff6b25
Grab Red Container but not lifting
htqcheng Apr 14, 2020
653fa30
move container above big container
Apr 14, 2020
d98fc9f
Changed Lateral Movement amount
htqcheng Apr 14, 2020
d1c37c3
Simulated Grasp
htqcheng Apr 14, 2020
03ae5cf
picks up box and flips
Apr 14, 2020
18d525b
added offset
Apr 16, 2020
53d395a
puts box down and release
Apr 16, 2020
8fc77fe
added initial function wrapper
Apr 22, 2020
9aede8b
merged some of bryson's code
Apr 25, 2020
9afbd52
changed to absolute code
brysonjones Apr 25, 2020
59db086
cycles from picking up objects to dumping them
Apr 25, 2020
7915923
tuning and many little bugs
Apr 25, 2020
e5d7c2f
remove unnecessary file and have the arm pick up all 3 objects
brysonjones Apr 26, 2020
be1dbfc
added some of the code to check if the objects are inside the large c…
brysonjones Apr 26, 2020
40f751c
code should work to check if objects are within bound, but need to st…
brysonjones Apr 26, 2020
cb1f07d
a few bugs cleaned up
brysonjones Apr 27, 2020
e1fc93d
have close dimensions for the large container inserted. within bounds…
brysonjones Apr 27, 2020
7f0c662
added code for passing out shapes to be reset
brysonjones Apr 27, 2020
0886153
added loop to pick up extra objects
Apr 27, 2020
717aa84
end state
Apr 27, 2020
525737d
tuned large container size a little bit. appears to consistently work…
brysonjones Apr 27, 2020
de0059a
final tweak to length and width of large container
brysonjones Apr 27, 2020
a6ab45f
fixed end state and changed parameters
Apr 27, 2020
b9b675e
Cleaned up CV2 imports
htqcheng Apr 27, 2020
40f76ad
Works now
htqcheng Apr 27, 2020
916e0e4
working state
Apr 27, 2020
cad60f1
Delete helper.cpython-36.pyc
htqcheng Apr 27, 2020
93d2884
Merge branch 'manipulation' of https://github.com/htqcheng/Robot_Auto…
Apr 27, 2020
cf897ce
extra if statement
Apr 27, 2020
ba69d12
Delete helper.cpython-36.pyc
htqcheng Apr 27, 2020
82f1c2a
fixed detection
Apr 27, 2020
466afc9
Merge branch 'manipulation' of https://github.com/htqcheng/Robot_Auto…
Apr 27, 2020
cdb102f
more likely to pick up box
Apr 27, 2020
08737ca
TensorForce Files Layout
htqcheng Apr 25, 2020
24ba052
Changes
htqcheng Apr 25, 2020
e5af877
cleaned up
htqcheng Apr 25, 2020
83d4673
Continue
htqcheng Apr 26, 2020
1faa67b
Keep going
htqcheng Apr 27, 2020
25efcaa
Wait
htqcheng Apr 27, 2020
728c5de
Keep going
htqcheng Apr 27, 2020
9788473
RL Working
htqcheng Apr 27, 2020
cad1758
Finished RL loop, can keep running to train
htqcheng Apr 27, 2020
81796cc
More Training
htqcheng Apr 27, 2020
745f7b4
Update requirements.txt
htqcheng Apr 27, 2020
80fa30c
Update README.md
htqcheng Apr 27, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Please use Python 3.6
1. Install [PyRep](https://github.com/stepjam/PyRep)
2. Install [RLBench](https://github.com/stepjam/RLBench)
3. `pip install -r requirements.txt`
4. The requirements.txt include TensorForce and TensorFlow for Reinforcement Learning

## Example RLBench Usage
Run `python rlbench_example.py` to launch the example script.
Expand All @@ -25,4 +26,4 @@ This script contains example code on how to control the robot, get observations,
## Useful Files
The following files may be useful to reference from the In the `rlbench` folder in the `RLBench` repo:
* `rlbench/action_modes.py` - Different action modes to control the robot
* `rlbench/backend/observation.py` - All fields available in the observation object
* `rlbench/backend/observation.py` - All fields available in the observation object
29 changes: 29 additions & 0 deletions TensorForceFiles/DQN_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from tensorforce import Agent
import sys
sys.path.append('../')
sys.path.append(sys.path[0] + '/TensorForceFiles')
from TensorForce_class import *
import numpy as np


class TensorForceDQN(TensorForceClass):

def __init__(self,num_states=6, num_actions=4, load=None):
super().__init__(num_states=num_states, num_actions=num_actions,load=load)
self.num_states = num_states
self.num_actions = num_actions



def createRLagent(self, load):
states_dict = {'type': 'float', 'shape': self.num_states}
actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high}

return Agent.create(
agent='dqn',
states = states_dict, # alternatively: states, actions, (max_episode_timesteps)
actions = actions_dict,
memory=10000,
exploration=0.3,
max_episode_timesteps= self.len_episode,
)
118 changes: 118 additions & 0 deletions TensorForceFiles/TensorForce_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from tensorforce import Agent
import numpy as np


class TensorForceClass:

def __init__(self, num_states=6, num_actions=4, load=None):

self.num_states = num_states
self.num_actions = num_actions
self.input_high = 1.0
self.input_low = 0.0


self.len_episode = 10
self.explore = 0.5

self.x_r = [-0.025, 0.52] ## X Range: -0.025 - 0.52
self.y_r = [-0.45, 0.45] ## Y Range: -0.45 - 0.45
self.z_r = [0.751, 1.75] ## Z Range: 0.751 - 1.75 (Maybe a little higher)

self.dist_before_action = 0
self.dist_after_action = 0

self.has_object = False

self.agent = self.createRLagent(load=load)
self.target_state = []


def createRLagent(self, load=None):
states_dict = {'type': 'float', 'shape': self.num_states}
actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high}

agent = Agent.create(
agent='tensorforce',
states = states_dict, # alternatively: states, actions, (max_episode_timesteps)
actions = actions_dict,
memory=10000,
update=dict(unit='timesteps', batch_size=64),
max_episode_timesteps= self.len_episode,
optimizer=dict(type='adam', learning_rate=3e-4),
policy=dict(network='auto'),
objective='policy_gradient',
reward_estimation=dict(horizon=20)
)

if not load ==None:
agent.restore(directory=load)

return agent

def act(self, obs, obj_poses):
gripper_pose = obs.gripper_pose


key = 'sugar'
###########################################################
###### PREPARE INPUT STATES TO RL FUNCTION ################
if key in obj_poses:
target_state = list(obj_poses[key])
target_state[2] += 0.1
else:
self.has_object = True
target_state = [0.2, 0.0, 1.1]
# in_states = list(gripper_pose)
# in_states.extend(target_state)

in_states = list(gripper_pose[:3])
in_states.extend(list(target_state[:3]))
# in_states.extend(list(obj_poses['cupboard']))
###### PREPARE INPUT STATES TO RL FUNCTION ################
###########################################################

actions = self.agent.act(states= in_states)
if self.explore > np.random.uniform():
actions = np.random.uniform(low=0.25, high=0.75, size=self.num_actions)

a_in = self.scaleActions(actions)

actions2 = list(a_in[:3]) + [0,1,0,0] + list([actions[3]>0.5])

self.dist_before_action = np.linalg.norm(target_state[:3] - gripper_pose[:3])
return actions2


def scaleActions(self, actions):

actions[0] = actions[0]*(self.x_r[1] - self.x_r[0]) + self.x_r[0]
actions[1] = actions[1]*(self.y_r[1] - self.y_r[0]) + self.y_r[0]
actions[2] = actions[2]*(self.z_r[1] - self.z_r[0]) + self.z_r[0]

return actions

def calculateReward(self):
terminal = False
reward = -self.dist_before_action/4


if self.dist_after_action < 0.2:
reward += 20 + 1/self.dist_after_action

temp = (self.dist_before_action - self.dist_after_action) / self.dist_before_action * 3
if temp > 0:
reward += temp
else:
reward += min(temp,-0.1)



if self.has_object:
reward += 100.0
terminal = True



return reward, terminal

Binary file not shown.
Binary file not shown.
Binary file not shown.
127 changes: 127 additions & 0 deletions TensorForceFiles/dqn_grasp_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from tensorforce import Agent
import sys
sys.path.append('../')
sys.path.append(sys.path[0] + '/TensorForceFiles')
from DQN_class import *
import numpy as np


class DQN_grasp(TensorForceDQN):

def __init__(self, num_actions=5, num_states=21, load=None):
self.num_states = num_states # Gripper pose, object pose
self.num_actions = num_actions # X, Y, Z, Yaw, Grasp
super().__init__(num_states= self.num_states, num_actions=self.num_actions, load=load)

self.x_r = [-.001, .001]
self.y_r = [-.001, .001]
self.z_r = [0.752, 0.7] ## Z Range: 0.751 - 1.75 (Maybe a little higher)
self.yaw_r = [0, np.pi]
self.gripper_open = True
self.target_start_pose = [0,0,0]
self.ee_pos = [0,0,0]
self.explore = 0.3
self.target_num = 0
self.target_name=''

def act(self, obs, obj_poses, key='sugar'):
gripper_pose = obs.gripper_pose
large_container_state = obj_poses['large_container']
self.ee_pos = gripper_pose
###########################################################
###### PREPARE INPUT STATES TO RL FUNCTION ################
if key in obj_poses:
target_state = list(obj_poses[key])
self.has_object = False
else:
self.has_object = True
target_state = gripper_pose
target_state[3] +=0.1

in_states = list(gripper_pose)
in_states.extend(list(target_state))
in_states.extend(list(large_container_state))
###### PREPARE INPUT STATES TO RL FUNCTION ################
###########################################################

actions = self.agent.act(states=in_states)

if self.explore > np.random.uniform():
actions = np.random.uniform(low=0.0, high=1, size=self.num_actions)

a_in = self.scaleActions(actions)
self.gripper_open = a_in[-1]>0.3

if self.num_actions == 5:
a_in[:2] += target_state[:2]
# a_in[:3] = gripper_pose[:3]
self.ee_pos = a_in[:3]
actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[3]) + list([self.gripper_open])

elif self.num_actions == 3:
self.ee_pos = [target_state[0], target_state[1], a_in[0]]
actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[1]) + list([self.gripper_open])


self.dist_before_action = max(0.05,np.linalg.norm(target_state[:3] - gripper_pose[:3]))
return actions2


def scaleActions(self, actions):

if self.num_actions == 5:
actions[0] = actions[0]*(self.x_r[1] - self.x_r[0]) + self.x_r[0]
actions[1] = actions[1]*(self.y_r[1] - self.y_r[0]) + self.y_r[0]
actions[2] = actions[2]*(self.z_r[1] - self.z_r[0]) + self.z_r[0]
actions[3] = actions[3]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0]
else:
actions[0] = actions[0]*(self.z_r[1] - self.z_r[0]) + self.z_r[0]
actions[1] = actions[1]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0]

if self.has_object: actions[-1] = 0

return actions


def calculateReward(self, i):
reward = 0
terminal = False
reward -= i

delta_dist = self.dist_before_action - self.dist_after_action
temp = (self.dist_before_action - self.dist_after_action) / self.dist_before_action * 3

if delta_dist > 0:
reward = temp
else:
reward += min(temp,-0.1)

# print(self.has_object)
if self.has_object:
reward += 100
print("Reward after grasping: ", reward)
terminal = True
# if self.ee_pos[-1] > self.z_r[1] - 0.05:
# reward += 250
# terminal = True
print(self.dist_after_action)

if not self.gripper_open and self.dist_before_action>0.1:
reward -= 20

if self.gripper_open and self.dist_before_action>0.1:
reward += 10

if not self.gripper_open and not self.has_object:
reward -= 3

return reward, terminal


def calculateQuaternion(self, angle):
firstElement = np.sin(angle/2)
secondElement = -np.cos(angle/2)
return [firstElement, secondElement, 0, 0]



Loading