diff --git a/README.md b/README.md
index c74e03c..ab9ccdb 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ Please use Python 3.6
 1. Install [PyRep](https://github.com/stepjam/PyRep)
 2. Install [RLBench](https://github.com/stepjam/RLBench)
 3. `pip install -r requirements.txt`
+4. The requirements.txt include TensorForce and TensorFlow for Reinforcement Learning
 
 ## Example RLBench Usage
 Run `python rlbench_example.py` to launch the example script.
@@ -25,4 +26,4 @@ This script contains example code on how to control the robot, get observations,
 ## Useful Files
 The following files may be useful to reference from the In the `rlbench` folder in the `RLBench` repo:
 * `rlbench/action_modes.py` - Different action modes to control the robot
-* `rlbench/backend/observation.py` - All fields available in the observation object
\ No newline at end of file
+* `rlbench/backend/observation.py` - All fields available in the observation object
diff --git a/TensorForceFiles/DQN_class.py b/TensorForceFiles/DQN_class.py
new file mode 100644
index 0000000..7f0eaf8
--- /dev/null
+++ b/TensorForceFiles/DQN_class.py
@@ -0,0 +1,29 @@
+from tensorforce import Agent
+import sys
+sys.path.append('../')
+sys.path.append(sys.path[0] + '/TensorForceFiles')
+from TensorForce_class import *
+import numpy as np
+
+
+class TensorForceDQN(TensorForceClass):
+
+    def __init__(self,num_states=6, num_actions=4, load=None):
+        super().__init__(num_states=num_states, num_actions=num_actions,load=load)
+        self.num_states = num_states
+        self.num_actions = num_actions
+        
+
+
+    def createRLagent(self, load):
+        states_dict = {'type': 'float', 'shape': self.num_states}
+        actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high}
+
+        return Agent.create(
+            agent='dqn',
+            states = states_dict,  # alternatively: states, actions, (max_episode_timesteps)
+            actions = actions_dict,
+            memory=10000,
+            exploration=0.3,
+            max_episode_timesteps= self.len_episode,
+        )
diff --git a/TensorForceFiles/TensorForce_class.py b/TensorForceFiles/TensorForce_class.py
new file mode 100644
index 0000000..baf70f3
--- /dev/null
+++ b/TensorForceFiles/TensorForce_class.py
@@ -0,0 +1,118 @@
+from tensorforce import Agent
+import numpy as np
+
+
+class TensorForceClass:
+
+    def __init__(self, num_states=6, num_actions=4, load=None):
+
+        self.num_states = num_states
+        self.num_actions = num_actions
+        self.input_high = 1.0
+        self.input_low  = 0.0
+        
+        
+        self.len_episode = 10
+        self.explore = 0.5
+
+        self.x_r = [-0.025, 0.52]   ## X Range: -0.025 - 0.52
+        self.y_r = [-0.45, 0.45]    ## Y Range: -0.45 - 0.45 
+        self.z_r = [0.751, 1.75]    ## Z Range: 0.751 - 1.75 (Maybe a little higher)
+
+        self.dist_before_action = 0
+        self.dist_after_action = 0
+
+        self.has_object = False
+
+        self.agent = self.createRLagent(load=load)
+        self.target_state = []
+
+
+    def createRLagent(self, load=None):
+        states_dict = {'type': 'float', 'shape': self.num_states}
+        actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high}
+
+        agent =   Agent.create(
+            agent='tensorforce',
+            states = states_dict,  # alternatively: states, actions, (max_episode_timesteps)
+            actions = actions_dict,
+            memory=10000,
+            update=dict(unit='timesteps', batch_size=64),
+            max_episode_timesteps= self.len_episode,
+            optimizer=dict(type='adam', learning_rate=3e-4),
+            policy=dict(network='auto'),
+            objective='policy_gradient',
+            reward_estimation=dict(horizon=20)
+        )
+
+        if not load ==None:
+            agent.restore(directory=load)
+
+        return agent
+
+    def act(self, obs, obj_poses):
+        gripper_pose = obs.gripper_pose
+
+
+        key = 'sugar'
+        ###########################################################
+        ###### PREPARE INPUT STATES TO RL FUNCTION ################
+        if key in obj_poses:
+            target_state = list(obj_poses[key])
+            target_state[2] += 0.1
+        else:
+            self.has_object = True
+            target_state = [0.2, 0.0, 1.1]
+        # in_states = list(gripper_pose)
+        # in_states.extend(target_state)
+
+        in_states = list(gripper_pose[:3])
+        in_states.extend(list(target_state[:3]))
+        # in_states.extend(list(obj_poses['cupboard']))
+        ###### PREPARE INPUT STATES TO RL FUNCTION ################
+        ###########################################################
+
+        actions = self.agent.act(states= in_states)
+        if self.explore > np.random.uniform():
+            actions = np.random.uniform(low=0.25, high=0.75, size=self.num_actions)
+
+        a_in = self.scaleActions(actions)
+
+        actions2 = list(a_in[:3]) + [0,1,0,0] + list([actions[3]>0.5])
+
+        self.dist_before_action = np.linalg.norm(target_state[:3] - gripper_pose[:3])
+        return actions2
+
+
+    def scaleActions(self, actions):
+   
+        actions[0] = actions[0]*(self.x_r[1] - self.x_r[0]) + self.x_r[0]
+        actions[1] = actions[1]*(self.y_r[1] - self.y_r[0]) + self.y_r[0]
+        actions[2] = actions[2]*(self.z_r[1] - self.z_r[0]) + self.z_r[0]
+    
+        return actions
+
+    def calculateReward(self):
+        terminal = False
+        reward = -self.dist_before_action/4
+
+
+        if self.dist_after_action < 0.2:
+            reward +=  20 + 1/self.dist_after_action
+
+        temp = (self.dist_before_action - self.dist_after_action) / self.dist_before_action * 3
+        if temp > 0:
+            reward += temp
+        else:
+            reward += min(temp,-0.1)
+
+        
+
+        if self.has_object: 
+            reward += 100.0
+            terminal = True
+
+        
+
+        return reward, terminal
+
diff --git a/TensorForceFiles/__pycache__/DQN_class.cpython-36.pyc b/TensorForceFiles/__pycache__/DQN_class.cpython-36.pyc
new file mode 100644
index 0000000..8e4176f
Binary files /dev/null and b/TensorForceFiles/__pycache__/DQN_class.cpython-36.pyc differ
diff --git a/TensorForceFiles/__pycache__/TensorForce_class.cpython-36.pyc b/TensorForceFiles/__pycache__/TensorForce_class.cpython-36.pyc
new file mode 100644
index 0000000..7f79ed0
Binary files /dev/null and b/TensorForceFiles/__pycache__/TensorForce_class.cpython-36.pyc differ
diff --git a/TensorForceFiles/__pycache__/dqn_grasp_class.cpython-36.pyc b/TensorForceFiles/__pycache__/dqn_grasp_class.cpython-36.pyc
new file mode 100644
index 0000000..eedffce
Binary files /dev/null and b/TensorForceFiles/__pycache__/dqn_grasp_class.cpython-36.pyc differ
diff --git a/TensorForceFiles/dqn_grasp_class.py b/TensorForceFiles/dqn_grasp_class.py
new file mode 100644
index 0000000..489a638
--- /dev/null
+++ b/TensorForceFiles/dqn_grasp_class.py
@@ -0,0 +1,127 @@
+from tensorforce import Agent
+import sys
+sys.path.append('../')
+sys.path.append(sys.path[0] + '/TensorForceFiles')
+from DQN_class import *
+import numpy as np
+
+
+class DQN_grasp(TensorForceDQN):
+
+    def __init__(self, num_actions=5, num_states=21, load=None):
+        self.num_states = num_states # Gripper pose, object pose
+        self.num_actions = num_actions # X, Y, Z, Yaw, Grasp
+        super().__init__(num_states= self.num_states, num_actions=self.num_actions, load=load)
+        
+        self.x_r = [-.001, .001]
+        self.y_r = [-.001, .001]
+        self.z_r = [0.752, 0.7] ## Z Range: 0.751 - 1.75 (Maybe a little higher)
+        self.yaw_r = [0, np.pi]
+        self.gripper_open = True
+        self.target_start_pose = [0,0,0]
+        self.ee_pos = [0,0,0]
+        self.explore = 0.3
+        self.target_num = 0
+        self.target_name=''
+
+    def act(self, obs, obj_poses, key='sugar'):
+        gripper_pose = obs.gripper_pose
+        large_container_state = obj_poses['large_container']
+        self.ee_pos = gripper_pose
+        ###########################################################
+        ###### PREPARE INPUT STATES TO RL FUNCTION ################
+        if key in obj_poses:
+            target_state = list(obj_poses[key])
+            self.has_object = False
+        else:
+            self.has_object = True
+            target_state = gripper_pose
+            target_state[3] +=0.1
+
+        in_states = list(gripper_pose)
+        in_states.extend(list(target_state))
+        in_states.extend(list(large_container_state))
+        ###### PREPARE INPUT STATES TO RL FUNCTION ################
+        ###########################################################
+
+        actions = self.agent.act(states=in_states)
+
+        if self.explore > np.random.uniform():
+            actions = np.random.uniform(low=0.0, high=1, size=self.num_actions)
+
+        a_in = self.scaleActions(actions)
+        self.gripper_open = a_in[-1]>0.3
+
+        if self.num_actions == 5:
+            a_in[:2] += target_state[:2]
+            # a_in[:3] = gripper_pose[:3]
+            self.ee_pos = a_in[:3]
+            actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[3]) + list([self.gripper_open])
+        
+        elif self.num_actions == 3:
+            self.ee_pos = [target_state[0], target_state[1], a_in[0]]
+            actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[1]) + list([self.gripper_open])
+
+
+        self.dist_before_action = max(0.05,np.linalg.norm(target_state[:3] - gripper_pose[:3]))
+        return actions2
+
+
+    def scaleActions(self, actions):
+   
+        if self.num_actions == 5:
+            actions[0] = actions[0]*(self.x_r[1] - self.x_r[0]) + self.x_r[0]
+            actions[1] = actions[1]*(self.y_r[1] - self.y_r[0]) + self.y_r[0]
+            actions[2] = actions[2]*(self.z_r[1] - self.z_r[0]) + self.z_r[0]
+            actions[3] = actions[3]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0]
+        else:
+            actions[0] = actions[0]*(self.z_r[1] - self.z_r[0]) + self.z_r[0]
+            actions[1] = actions[1]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0]
+
+        if self.has_object: actions[-1] = 0
+
+        return actions
+    
+
+    def calculateReward(self, i):
+        reward = 0
+        terminal = False
+        reward -= i
+
+        delta_dist = self.dist_before_action - self.dist_after_action
+        temp = (self.dist_before_action - self.dist_after_action) / self.dist_before_action * 3
+
+        if delta_dist > 0:
+            reward = temp
+        else:
+            reward += min(temp,-0.1)
+        
+        # print(self.has_object)
+        if self.has_object: 
+            reward += 100
+            print("Reward after grasping: ", reward)
+            terminal = True
+            # if self.ee_pos[-1] > self.z_r[1] - 0.05:
+            #     reward += 250
+            #     terminal = True
+        print(self.dist_after_action)
+
+        if not self.gripper_open and self.dist_before_action>0.1:
+            reward -= 20
+        
+        if self.gripper_open and self.dist_before_action>0.1:
+            reward += 10
+
+        if not self.gripper_open and not self.has_object:
+            reward -= 3
+
+        return reward, terminal
+
+
+    def calculateQuaternion(self, angle):
+        firstElement  = np.sin(angle/2)
+        secondElement = -np.cos(angle/2)
+        return [firstElement, secondElement, 0, 0]
+
+
+
diff --git a/TensorForceFiles/dqn_place_class.py b/TensorForceFiles/dqn_place_class.py
new file mode 100644
index 0000000..dd4e6e9
--- /dev/null
+++ b/TensorForceFiles/dqn_place_class.py
@@ -0,0 +1,155 @@
+from tensorforce import Agent
+from TensorForceRL_parent import *
+from dqn_grasp_class import *
+import numpy as np
+from quaternion import from_rotation_matrix, quaternion, as_euler_angles, from_euler_angles, as_quat_array
+from scipy.spatial.transform import Rotation as R
+
+
+
+class DQN_place(DQN_grasp):
+
+    def __init__(self, num_states=16, num_actions=5, load=None):
+        self.num_states = 16 # Object pose, cabinet pose, Gripper Open, Target_num
+        self.num_actions = 5 # X, Y, Z, yaw, grasp
+        super().__init__(num_states=num_states, num_actions=num_actions, load=load)
+        
+        self.x_r = [-0.1, 0.05] # Cupboard coords: Point down
+        self.y_r = [-0.1, 0.1]  # Cupboad coords: Point to left of cupboard
+        self.z_r = [-0.275, 0.1] # Cupboard coords: Point to cupboard
+        self.z_r = [-0.1, 0.1] # Cupboard coords: Point to cupboard
+        self.yaw_r = [0, np.pi] 
+        self.explore = 0.5
+        self.stage_point = []
+        self.obj_poses = []
+
+    def createRLagent(self, load):
+        states_dict = {'type': 'float', 'shape': self.num_states}
+        actions_dict = {'type': 'float', 'shape': self.num_actions, 'min_value': self.input_low, 'max_value': self.input_high}
+
+        return Agent.create(
+            agent='dqn',
+            states = states_dict,  # alternatively: states, actions, (max_episode_timesteps)
+            actions = actions_dict,
+            memory=10000,
+            exploration=0.75,
+            max_episode_timesteps= self.len_episode,
+        )
+
+    def getInStates(self, key, obj_poses):
+        target_state = list(obj_poses[key])
+        self.target_state = target_state
+        self.has_object = False
+
+        in_states = list(self.ee_pos)
+        in_states.extend(list(self.target_state))
+
+        in_states.extend(list([self.gripper_open*1.0, self.target_num]))
+        return in_states
+        
+
+    def act(self, obs, obj_poses, key='crackers'):
+        self.stage_point = obj_poses['waypoint3']
+        gripper_pose = obs.gripper_pose
+        self.ee_pos = gripper_pose
+        self.obj_poses = obj_poses
+
+        in_states = self.getInStates('waypoint4', obj_poses)
+
+        actions = self.agent.act(states=in_states)
+
+        # if self.explore > np.random.uniform():
+            # actions = np.random.uniform(low=0.0, high=1, size=self.num_actions)
+
+        a_in = self.scaleActions(actions)
+        self.gripper_open = a_in[-1]>0.5
+
+        self.ee_pos = a_in[:3]
+        # actions2 = list(self.ee_pos) + self.calculateQuaternion(a_in[3]) + list([self.gripper_open])
+        actions2 = list(self.ee_pos) + list(self.stage_point[3:7]) + list([self.gripper_open])
+
+        self.dist_before_action = max(0.05,np.linalg.norm(self.target_state[:3] - gripper_pose[:3]))
+
+        return actions2
+
+
+    def scaleActions(self, actions):
+   
+        # Scale all the Actions
+        x_pre      = actions[0]*(self.x_r[1] -   self.x_r[0])   + self.x_r[0]
+        y_pre      = actions[1]*(self.y_r[1] -   self.y_r[0])   + self.y_r[0]
+        z_pre      = actions[2]*(self.z_r[1] -   self.z_r[0])   + self.z_r[0]
+        actions[3] = actions[3]*(self.yaw_r[1] - self.yaw_r[0]) + self.yaw_r[0]
+
+
+        trans = np.array([x_pre, y_pre, z_pre])
+
+        # Convert the Cabinet Frame Coordinates into the World Fram Coordinates
+        trans = self.convertTargetCoordsToWorld(trans)
+
+        actions[:3] = trans
+        if self.has_object: actions[-1] = 0
+
+        return actions
+    
+
+    def calculateReward(self):
+        if self.gripper_open:
+            t_pos = self.obj_poses[self.target_name[:-12]]
+            t_pos = self.convertTargetCoordsToCabinet(t_pos)
+            in_cab = self.x_r[0] <= t_pos[0] <= self.x_r[1]
+            in_cab = in_cab and self.y_r[0] <= t_pos[1] <= self.z_r[1]
+            in_cab = in_cab and self.z_r[0] <= t_pos[2] <= self.z_r[1]
+            if not in_cab:
+                reward = -1
+            else:
+                reward = 5
+            terminal = True
+
+        else:
+            terminal = False
+            reward = 0
+
+        return reward, terminal
+
+    def calculateQuaternion(self, angle):
+        firstElement  = np.sin(angle/2)
+        secondElement = -np.cos(angle/2)
+        return [0, secondElement, 0, firstElement]
+
+ 
+    #### CODE TO TRANSLATE BETWEEN CUPBOARD AND WORLD FRAMES IMPLEMENT WITH RL SCALE AND ACTIONS
+    def convertTargetCoordsToWorld(self, target_coord, target_name='waypoint4'):
+        t_pos = self.obj_poses[target_name]
+
+        # Create Rotation Matrix
+        rot_val = R.from_quat(self.obj_poses[target_name][3:7])
+        to_world_mat = rot_val.as_matrix()
+
+        # Make Rotation Matrix an Affine 4x4 matrix
+        to_world_mat = np.hstack((to_world_mat,np.array(t_pos[:3]).reshape([-1,1])))
+        to_world_mat = np.vstack((to_world_mat, [0,0,0,1]))
+
+        # Convert the Coordinates
+        go_pt = np.hstack((target_coord, 1))
+        world_pt = to_world_mat @ go_pt.T
+
+        return world_pt[:3]
+
+    def convertTargetCoordsToCabinet(self, target_coord, target_name='waypoint4'):
+        t_pos = self.obj_poses[target_name]
+
+        # Create Rotation Matrix
+        rot_val = R.from_quat(self.obj_poses[target_name][3:7])
+        to_world_mat = rot_val.as_matrix()
+
+        # Make Rotation Matrix an Affine 4x4 matrix
+        to_world_mat = np.hstack((to_world_mat,np.array(t_pos[:3]).reshape([-1,1])))
+        to_world_mat = np.vstack((to_world_mat, [0,0,0,1]))
+        to_cupboard_mat = np.linalg.inv(to_world_mat)
+
+        # Convert the Coordinates
+        world_pt = np.hstack((target_coord[:3],1))
+        cab_pt = to_cupboard_mat @ world_pt.T
+
+        return cab_pt
\ No newline at end of file
diff --git a/__pycache__/helper.cpython-36.pyc b/__pycache__/helper.cpython-36.pyc
new file mode 100644
index 0000000..bd130d6
Binary files /dev/null and b/__pycache__/helper.cpython-36.pyc differ
diff --git a/dqn_grasp/agent.data-00000-of-00001 b/dqn_grasp/agent.data-00000-of-00001
new file mode 100644
index 0000000..390c9ba
Binary files /dev/null and b/dqn_grasp/agent.data-00000-of-00001 differ
diff --git a/dqn_grasp/agent.index b/dqn_grasp/agent.index
new file mode 100644
index 0000000..626a171
Binary files /dev/null and b/dqn_grasp/agent.index differ
diff --git a/dqn_grasp/agent.json b/dqn_grasp/agent.json
new file mode 100644
index 0000000..b9ec65f
--- /dev/null
+++ b/dqn_grasp/agent.json
@@ -0,0 +1 @@
+{"agent": "dqn", "states": {"type": "float", "shape": 21}, "actions": {"type": "float", "shape": 5, "min_value": 0.0, "max_value": 1.0}, "max_episode_timesteps": 10, "network": "auto", "memory": 10000, "batch_size": 32, "update_frequency": null, "start_updating": null, "learning_rate": 0.0003, "huber_loss": 0.0, "horizon": 0, "discount": 0.99, "estimate_terminal": false, "target_sync_frequency": 1, "target_update_weight": 1.0, "preprocessing": null, "exploration": 0.3, "variable_noise": 0.0, "l2_regularization": 0.0, "entropy_regularization": 0.0, "name": "agent", "device": null, "parallel_interactions": 1, "seed": null, "execution": null, "saver": null, "summarizer": null, "recorder": null, "config": null}
\ No newline at end of file
diff --git a/dqn_grasp/agent.meta b/dqn_grasp/agent.meta
new file mode 100644
index 0000000..18efe6e
Binary files /dev/null and b/dqn_grasp/agent.meta differ
diff --git a/dqn_grasp/checkpoint b/dqn_grasp/checkpoint
new file mode 100644
index 0000000..f5eb96b
--- /dev/null
+++ b/dqn_grasp/checkpoint
@@ -0,0 +1,2 @@
+model_checkpoint_path: "agent"
+all_model_checkpoint_paths: "agent"
diff --git a/helper.py b/helper.py
new file mode 100644
index 0000000..8a6a272
--- /dev/null
+++ b/helper.py
@@ -0,0 +1,467 @@
+'''
+This file contains helper functions for the 16-662 Robot Autonomy
+Final Project. This was completed by Bryson Jones, Quentin Cheng,
+and Shaun Ryer.
+Carnegie Mellon University
+MRSD Class of 2021
+'''
+
+import numpy as np
+import scipy as sp
+from quaternion import from_rotation_matrix, quaternion, as_euler_angles, from_euler_angles, as_float_array
+# import imutils
+# from imutils import perspective
+# from imutils import contours
+from collections import namedtuple
+from shapely.geometry import Point, Polygon
+import time
+import warnings
+
+
+# def generate_bounding_box(rgb_img, lower_bound, upper_bound):
+#     '''
+#     :param rgb_img: an rgb image with an object that should have a bounding
+#                     box overlaid onto it
+#     :param lower_bound: lower thresholding bound (1 x 3)
+#     :param upper_bound: upper thresholding bound (1 x 3)
+#     :return: bgr_modified_img: image in bgr format, which has bounding box
+#                                overlaid
+#     '''
+
+#     # convert rgb to hsv
+#     bgr = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
+
+#     # create bounded mask
+#     mask_red = cv2.inRange(bgr, lower_bound, upper_bound)
+
+#     # bitwise mask
+#     redBoxOnlyFrame = cv2.bitwise_and(bgr.astype(np.uint8), bgr.astype(np.uint8), mask=mask_red)
+
+#     # convert image to gray
+#     gray = cv2.cvtColor(redBoxOnlyFrame, cv2.COLOR_BGR2GRAY)
+#     gray = cv2.GaussianBlur(gray, (7, 7), 0)
+#     # perform edge detection, then perform a dilation + erosion to
+#     # close gaps in between object edges
+#     edges = cv2.Canny(gray, 50, 100)
+#     edges = cv2.dilate(edges, None, iterations=1)
+#     edges = cv2.erode(edges, None, iterations=1)
+
+#     # find contours in the edge map
+#     cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL,
+#                             cv2.CHAIN_APPROX_SIMPLE)
+#     cnts = imutils.grab_contours(cnts)
+#     # sort the contours from left-to-right and initialize the
+#     # 'pixels per metric' calibration variable
+#     (cnts, _) = contours.sort_contours(cnts)
+
+#     # loop over the contours individually
+#     bgr_modified_img = bgr.copy()
+#     for c in cnts:
+#         # if the contour is not sufficiently large, ignore it
+#         if cv2.contourArea(c) < 100:
+#             continue
+#         # compute the rotated bounding box of the contour
+#         box = cv2.minAreaRect(c)
+#         box = cv2.boxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)
+#         box = np.array(box, dtype="int")
+#         # order the points in the contour such that they appear
+#         # in top-left, top-right, bottom-right, and bottom-left
+#         # order, then draw the outline of the rotated bounding
+#         # box
+#         box = perspective.order_points(box)
+#         cv2.drawContours(bgr_modified_img, [box.astype("int")], -1, (0, 255, 0), 2)
+
+#     cv2.imwrite('test.jpg', bgr_modified_img)
+
+#     return bgr_modified_img
+
+def pick_up_box_variables(large_container_pos,obs,z,small_container_pos):
+    above_large_container = large_container_pos
+    above_large_container[2] += 0.3
+
+    notflipped = quaternion(obs.gripper_pose[3],obs.gripper_pose[4],obs.gripper_pose[5],obs.gripper_pose[6])
+    notflipped_array = as_float_array(notflipped)
+    flipped_euler = as_euler_angles(notflipped)
+    
+
+    amount_2_flip = -2.4
+    if np.cos(z)<0:
+        amount_2_flip = -amount_2_flip
+    flipped_euler[2] += amount_2_flip
+    flipped = from_euler_angles(flipped_euler)
+    flipped_array = as_float_array(flipped)
+
+    above_large_container[0] += 0.070*np.sin(z)
+    above_large_container[1] += 0.070*np.cos(z)
+
+    small_container_pos_original = small_container_pos + 0*small_container_pos
+    small_container_pos_original[0] = small_container_pos[0]+0.070*np.sin(z)
+    small_container_pos_original[1] = small_container_pos[1]+0.070*np.cos(z)
+
+    return above_large_container,small_container_pos_original,notflipped_array,flipped_array
+
+def pick_up_box(state,obs,gripper,small_container0_obj,z,small_container_pos,small_container_pos_original,gripper_pose,above_large_container,flipped_array,notflipped_array):
+    # move to red box
+    # state = 5
+    if state == -1:
+        print('state -1')
+        action = np.concatenate((small_container_pos_original+[np.sin(z)*0.02,np.cos(z)*0.02,0.3], gripper_pose[3:7], np.array([1])))
+        state = 0
+        return state,action
+
+    elif state == 0:
+        print('state 0')
+        action = np.concatenate((small_container_pos_original+[np.sin(z)*0.02,np.cos(z)*0.02,0.01], gripper_pose[3:7], np.array([1])))
+        state = 1
+        return state,action
+    # wait
+    elif state == 1:
+        print('state 1')
+        action = np.concatenate((obs.gripper_pose, np.array([1])))
+        # action[-1] = 0
+        
+        state = 2
+        return state,action
+    # moving sideways
+    elif state == 2:
+        pose = obs.gripper_pose
+        pose[0] += np.sin(z)*0.02
+        pose[1] += np.cos(z)*0.02
+        action = np.concatenate((pose, np.array([0])))
+        gripper.grasp(small_container0_obj)
+        
+        state = 3
+        return state,action
+    # moving up
+    elif state == 3:
+        pose = obs.gripper_pose
+        pose[2] += 0.1
+        gripper.grasp(small_container0_obj)
+        action = np.concatenate((pose, np.array([0])))
+        state = 4
+        return state,action
+    # above big container
+    elif state == 4:
+        action = np.concatenate((above_large_container, obs.gripper_pose[3:7], np.array([0])))
+        state = 5
+        return state,action
+    #flip box
+    elif state ==5:
+        # print(flipped_array)
+        action = np.concatenate((above_large_container, flipped_array, np.array([0])))
+        state = 6
+        return state,action
+    elif state ==6:
+        # print(flipped_array)
+        action = np.concatenate((above_large_container, notflipped_array, np.array([0])))
+        state = 7
+        return state,action
+    elif state ==7:
+        # print(flipped_array)
+        action = np.concatenate((small_container_pos_original+[0,0,0.1], notflipped_array, np.array([0])))
+        state = 8
+        return state,action
+    elif state ==8:
+        # print(flipped_array)
+        action = np.concatenate((small_container_pos_original, notflipped_array, np.array([0])))
+        state = 9
+        return state,action
+    elif state ==9:
+        # print(flipped_array)
+        action = np.concatenate((small_container_pos_original, notflipped_array, np.array([1])))
+        state = 10
+        return state,action
+    elif state ==10:
+        # print(flipped_array)
+        action = np.concatenate((small_container_pos_original+[0,0,0.3], notflipped_array, np.array([1])))
+        state = 11
+        return state,action
+    else:
+        action = np.concatenate((obs.gripper_pose, np.array([1])))
+        return state,action
+
+    
+def rl_get_objects(RLagent, task, shape, obs, obj_poses, box_pos, success, i):
+
+    target_name = 'Shape' + shape
+
+    # Update arm states
+    RLagent.has_object = len(task._robot.gripper._grasped_objects) > 0
+
+    actions = RLagent.act(obs,obj_poses, key=target_name)
+
+    try:
+        obs, reward, terminal = task.step(actions)
+        print("RL moved me")
+
+        ### Check current distance
+        target_state = list(obj_poses[target_name])
+        RLagent.dist_after_action = max(0.05,np.linalg.norm(target_state[:3] - obs.gripper_pose[:3]))
+        # RLagent.dist_after_action = np.linalg.norm(target_state[:3] - obs.gripper_pose[:3])
+
+        ### Calculate reward
+        reward, terminal = RLagent.calculateReward(i)
+
+    except:
+        print("Couldn't move")
+        # reward = -RLagent.dist_before_action*5
+        ### Calculate reward
+        reward, terminal = RLagent.calculateReward(i)
+        # terminal = False
+
+    ## Observe results
+    RLagent.agent.observe(terminal=terminal, reward=reward)
+
+    if RLagent.has_object:
+        success = True
+        print("Success!")
+
+    return obs, success, reward
+
+def get_objects(RLagent, task, obj_pose_sensor, state, shape, obs, object_pos, box_pos):
+    
+    #move above object
+    if state == 0:
+        
+        action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1])))
+        state = 1
+
+        return action, state, shape
+
+    # RL to take over moving to and grasp object
+    elif state == 1:
+        success = False
+        total_reward = 0
+        i = 0
+        while (not success):
+            obj_poses = obj_pose_sensor.get_poses()
+            obs, success, reward = rl_get_objects(RLagent, task, shape, obs, obj_poses, box_pos, success, i)
+            i += 1
+            total_reward += reward
+            print('Iteration: %i, Reward: %.1f' %(i, reward))
+            if i>=50:
+                "Failed to grasp, skipping the object."
+                success = True
+        
+        action = np.concatenate((obs.gripper_pose, np.array([0])))
+        print('Success on Iteration: %i, Total Reward: %.1f' %(i, total_reward))
+        RLagent.agent.save(directory='dqn_grasp')
+        state = 3
+
+        return action, state, shape
+
+    # #move to object
+    # elif state == 1:
+        
+    #     action = np.concatenate((object_pos[0:3]+[0,0,0.001], obs.gripper_pose[3:7], np.array([1])))
+        
+    #     state = 2
+
+    #     return action, state, shape
+    
+    # #move up
+    # elif state == 2:
+        
+    #     action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0])))
+    #     state = 3
+
+    #     return action, state, shape
+
+    #move above box
+    elif state == 3:
+        
+        action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0])))
+        state = 4
+
+        return action, state, shape
+
+    #move close to box
+    if state == 4:
+        
+        action = np.concatenate((box_pos[0:3]+[0,0,0.07], obs.gripper_pose[3:7], np.array([0])))
+        state = 5
+
+        return action, state, shape
+
+    #drop and move away
+
+    else:
+        shape = str(int(shape) + 2)
+        state=0
+        action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1])))
+        return action, state, shape
+
+    
+
+    # elif state == 1:
+    #     if obs.gripper_pose[2] > .95:
+    #         state = 2
+    #     return [0, 0, stepsize, 0, 0, 0, 1, 0], state, shape
+
+    # elif state == 2:
+    #     if abs(box_pos[0] - obs.gripper_pose[0]) > .005 or abs(box_pos[1] - obs.gripper_pose[1]) > .005:
+    #         action = np.concatenate((np.array([box_pos[0], box_pos[1], 0]), obs.gripper_pose[3:7], np.array([1])))
+
+    #     else:
+    #         action = np.concatenate((box_pos, obs.gripper_pose[3:7], np.array([1])))
+
+    #     if np.isclose(box_pos, obs.gripper_pose, .05):
+    #         state = 3
+    #         return [0, 0, 0, 0, 0, 0, 1, 1], state, shape
+
+    #     return action, state, shape
+
+    # elif state == 3:
+    #     if obs.gripper_pose[2] > 1:
+    #         state = 0
+    #         shape = str(int(shape) + 2)
+    #     return [0, 0, stepsize, 0, 0, 0, 1, 1], state, shape
+
+
+def checkShapePosition(obj_poses, obs):
+    # have while loop to check for all of shape positions being accessible
+    shapePosAccessible = False
+    while not shapePosAccessible:
+        try:
+            shape0_pos = obj_poses['Shape0'][:3]
+            print('Shape0 position accessed')
+            shape2_pos = obj_poses['Shape2'][:3]
+            print('Shape2 position accessed')
+            shape4_pos = obj_poses['Shape4'][:3]
+            print('All shape positions are available')
+            shapePosAccessible = True
+
+        except KeyError:
+            warnings.warn('Can''t access all shapes yet.')
+            time.sleep(1)
+
+    # once we can access the object shape positions, we need to
+    # we need to check if there are within the bounds of the large container
+    largeContainerPosition = obj_poses['large_container'][:3]
+    largeContainerOrientation = obj_poses['large_container'][3:]
+
+    largeContainerOrientation = quaternion(largeContainerOrientation[0],
+                                           largeContainerOrientation[1],
+                                           largeContainerOrientation[2],
+                                           largeContainerOrientation[3])
+    largeContainerOrientation = as_euler_angles(largeContainerOrientation)
+
+    transformValues = namedtuple('transformValues', ['x', 'y', 'yaw'])
+    transformValuesPosition = transformValues(x=0, y=0, yaw=largeContainerOrientation[1])
+
+    H_box = getTransform(transformValuesPosition)
+
+    # get dimensions of box !!!!!!!!!!!!! THESE ARE RANDOM VALUES RIGHT NOW!!!!!!!!
+    largeContainerLength = .34  # x dimension
+    largeContainerWidth = .34  # y dimension
+
+    boxCornerPoints = np.array([[largeContainerLength / 2, -largeContainerLength / 2, -largeContainerLength / 2, largeContainerLength / 2],
+                                [largeContainerWidth / 2, largeContainerWidth / 2, -largeContainerWidth / 2, -largeContainerWidth / 2],
+                                [1, 1, 1, 1]])
+
+    rotatedBoxCornerPoints = H_box @ boxCornerPoints
+
+    finalBoxCornerPoints = rotatedBoxCornerPoints + np.array([[largeContainerPosition[0]],
+                                                              [largeContainerPosition[1]],
+                                                              [1]])
+
+    finalBoxCornerCoords = [(finalBoxCornerPoints[0, 0], finalBoxCornerPoints[1, 0]),
+                            (finalBoxCornerPoints[0, 1], finalBoxCornerPoints[1, 1]),
+                            (finalBoxCornerPoints[0, 2], finalBoxCornerPoints[1, 2]),
+                            (finalBoxCornerPoints[0, 3], finalBoxCornerPoints[1, 3])]
+
+    boxPolygon = Polygon(finalBoxCornerCoords)
+
+    # check if points are in bounds
+    shapes = [shape0_pos, shape2_pos, shape4_pos]
+    shapeNum = ['0', '2', '4']
+    shapesToBeReset = []
+    for i in range(3):
+        pointToCheck = Point(shapes[i][0], shapes[i][1])
+        isWithinBox = pointToCheck.within(boxPolygon)
+        print('Shape' + shapeNum[i], 'within bounds: ', isWithinBox)
+        if not isWithinBox:#################################################need to change back: if not isWithinBox:
+            shapesToBeReset.append(shapeNum[i])
+
+    mode = 3
+
+    return mode, shapesToBeReset
+
+
+def getTransform(ArgStruct):
+    '''
+    getTransformation
+
+       Returns the 2D homogeneous transformation for the given pose.
+
+       Args: ArgStruct
+           ArgStruct.x: x axis position of frame 2's origin in frame 1 [m]
+           ArgStruct.y: y axis position of frame 2's origin in frame 1 [m]
+           ArgStruct.yaw: angle between frames [rad]
+
+       Returns: H
+           H: 2D homogeneous transformation matrix [m]
+
+       Raises:
+           none
+
+    '''
+    H = np.asarray([[np.cos(ArgStruct.yaw), -np.sin(ArgStruct.yaw), ArgStruct.x],
+                    [np.sin(ArgStruct.yaw), np.cos(ArgStruct.yaw), ArgStruct.y],
+                    [0, 0, 1]])
+
+    return H
+
+def put_in_big_container(state, shape, obs, object_pos, box_pos):
+
+    print("state:",state)
+    #move above object
+    if state == 0:
+        
+        action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1])))
+        state = 1
+
+        return action, state, 1
+    #move to object
+    elif state == 1:
+        
+        action = np.concatenate((object_pos[0:3]+[0,0,0.001], obs.gripper_pose[3:7], np.array([1])))
+        
+        state = 2
+
+        return action, state, 1
+    #move up
+    elif state == 2:
+        
+        action = np.concatenate((object_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0])))
+        state = 3
+
+        return action, state, 1
+
+    #move above box
+    elif state == 3:
+        
+        action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([0])))
+        state = 4
+
+        return action, state, 1
+
+    #move close to box
+    if state == 4:
+        
+        action = np.concatenate((box_pos[0:3]+[0,0,0.1], obs.gripper_pose[3:7], np.array([0])))
+        state = 5
+
+        return action, state, 1
+
+    #drop and move away
+
+    else:
+        shape = str(int(shape) + 2)
+        state=0
+        action = np.concatenate((box_pos[0:3]+[0,0,0.15], obs.gripper_pose[3:7], np.array([1])))
+        return action, state, 0
+
+    
+
+    
diff --git a/requirements.txt b/requirements.txt
index a10ba80..f08f8cf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 numpy
 scipy
-numpy-quaternion
\ No newline at end of file
+numpy-quaternion
+tensorforce
+tensorforce[tf]
diff --git a/rlbench_empty_container.py b/rlbench_empty_container.py
index 2085fa1..8823ef3 100644
--- a/rlbench_empty_container.py
+++ b/rlbench_empty_container.py
@@ -1,18 +1,28 @@
 import numpy as np
 import scipy as sp
-from quaternion import from_rotation_matrix, quaternion
+from quaternion import from_rotation_matrix, quaternion, as_euler_angles, from_euler_angles, as_float_array
+import time
+from collections import namedtuple
 
 from rlbench.environment import Environment
 from rlbench.action_modes import ArmActionMode, ActionMode
 from rlbench.observation_config import ObservationConfig
 from rlbench.tasks import *
 
+from helper import pick_up_box, pick_up_box_variables, get_objects
+from helper import *
+
+import sys
+sys.path.append('../')
+sys.path.append(sys.path[0] + '/TensorForceFiles')
+from tensorforce import Agent
+from dqn_grasp_class import DQN_grasp
+
 def skew(x):
     return np.array([[0, -x[2], x[1]],
                     [x[2], 0, -x[0]],
                     [-x[1], x[0], 0]])
 
-
 def sample_normal_pose(pos_scale, rot_scale):
     '''
     Samples a 6D pose from a zero-mean isotropic normal distribution
@@ -25,16 +35,6 @@ def sample_normal_pose(pos_scale, rot_scale):
 
     return pos, quat_wxyz
 
-
-class RandomAgent:
-
-    def act(self, obs):
-        delta_pos = [(np.random.rand() * 2 - 1) * 0.005, 0, 0]
-        delta_quat = [0, 0, 0, 1] # xyzw
-        gripper_pos = [np.random.rand() > 0.5]
-        return delta_pos + delta_quat + gripper_pos
-
-
 class MoveAgent:
 
     def act(self, obs, target_pos):
@@ -49,7 +49,6 @@ def act(self, obs, target_pos):
 
         return np.concatenate((robotStep, delta_quat, gripper_pos))
 
-
 class NoisyObjectPoseSensor:
 
     def __init__(self, env):
@@ -77,34 +76,154 @@ def get_poses(self):
             obj_poses[name] = pose
 
         return obj_poses
+    
+    def get_objs(self):
+        objs = self._env._scene._active_task.get_base().get_objects_in_tree(exclude_base=True, first_generation_only=False)
+        return objs
 
 
 if __name__ == "__main__":
-    action_mode = ActionMode(ArmActionMode.DELTA_EE_POSE) # See rlbench/action_modes.py for other action modes
+
+    mode = 0
+    action_mode = ActionMode(ArmActionMode.ABS_EE_POSE_PLAN) # See rlbench/action_modes.py for other action modes
     env = Environment(action_mode, '', ObservationConfig(), False)
     task = env.get_task(EmptyContainer) # available tasks: EmptyContainer, PlayJenga, PutGroceriesInCupboard, SetTheTable
-    agent = moveAgent()
+    agent = MoveAgent()
     obj_pose_sensor = NoisyObjectPoseSensor(env)
-   
+    update = 0
+    count = 0
+    
+    # TensorForce Initialization
+    rl_grasp_agent = DQN_grasp(load='dqn_grasp')
+    # rl_grasp_agent = DQN_grasp()
+
+    gripper = task._robot.gripper
+    objs = obj_pose_sensor.get_objs()
+    for obj in objs:
+        if (obj.get_name()=='small_container0'):
+            small_container0_obj = obj
+
     descriptions, obs = task.reset()
     print(descriptions)
+    global state
+    state = 0
+    global shape
+    shape = '0'
+
+    # initialize shape poses
+    obj_poses = obj_pose_sensor.get_poses()
+    shape_pos = obj_poses['Shape' + shape][:3]
+    most_recent_shape_pos = shape_pos
+
+    # Getting noisy object poses
+    small_container_pos = obj_poses['small_container0'][:3]
+    small_container_pos_original = obj_poses['small_container0'][:3]
+    large_container_pos = obj_poses['large_container'][:3]
+    small_container_quat2 = obj_poses['small_container0'][3:7]
+    small_container_pos[2] -= 0.01
+    small_container_quat = quaternion(obj_poses['small_container0'][3], obj_poses['small_container0'][4], obj_poses['small_container0'][5],obj_poses['small_container0'][6])
+    small_container_euler = as_euler_angles(small_container_quat)
+
+    z = small_container_euler[0]
+
+    above_large_container,small_container_pos_original, notflipped_array,flipped_array = pick_up_box_variables(large_container_pos,obs,z,small_container_pos)
+
     while True:
-        # Getting noisy object poses
-        obj_poses = obj_pose_sensor.get_poses()
-        small_container_pos = obj_poses['small_container0'][:3]
-
-        # Getting various fields from obs
-        current_joints = obs.joint_positions
-        gripper_pose = obs.gripper_pose
-        rgb = obs.wrist_rgb
-        depth = obs.wrist_depth
-        mask = obs.wrist_mask
-
-        # Perform action and step simulation
-        action = agent.act(obs, small_container_pos)
-        obs, reward, terminate = task.step(action)
 
-        # if terminate:
-        #     break
+        print('mode is: ', mode)
+        if mode==0:
+
+            # Getting noisy object poses
+            obj_poses = obj_pose_sensor.get_poses()
+            small_container_pos = obj_poses['small_container0'][:3]
+
+
+            # Perform action and step simulation
+            if int(shape) < 5:  # shape goes from 0, 2, 4
+                try:
+                    shape_pos = obj_poses['Shape' + shape][:3]
+                    most_recent_shape_pos = shape_pos
+                except KeyError:
+                    shape_pos = most_recent_shape_pos
+
+            print('Shape is: ', int(shape))
+
+            action, state, shape = get_objects(rl_grasp_agent, task, obj_pose_sensor, state, shape, obs, shape_pos, small_container_pos)
+            print('State is: ', state)
+            # print("done")
+            if int(shape) > 4:
+                print("Finished all pickup")
+                mode = 4
+
+        elif mode == 0.5:
+            obj_poses = obj_pose_sensor.get_poses()
+            small_container_pos = obj_poses['small_container0'][:3]
+            gripper_pose = obs.gripper_pose
+            small_container_pos[0] += 0.070*np.sin(z)
+            small_container_pos[1] += 0.070*np.cos(z)
+            update=-1
+            mode = 1
+        
+        elif mode == 1:
+            update, action = pick_up_box(update,obs,gripper,
+                                         small_container0_obj,z,
+                                         small_container_pos,
+                                         small_container_pos_original,
+                                         gripper_pose,above_large_container,
+                                         flipped_array,notflipped_array)
+
+            print('state update (mode 1, pick-up box): ', update)
+
+            shape = '0'
+            if update == 11:
+                mode = 2
+
+        elif mode == 2:
+            obj_poses = obj_pose_sensor.get_poses()
+            mode, shapesToBeReset = checkShapePosition(obj_poses, obs)
+            action = np.concatenate((obs.gripper_pose, np.array([1])))
+            print(mode)
+            state = 0
+            i=0
+            do_once_per_loop=0
+            # mode = 3
+
+        elif mode ==3:
+
+            if shapesToBeReset==[]:
+
+                mode=4
+            else:
+                obj_poses = obj_pose_sensor.get_poses()
+                large_container_pos = obj_poses['large_container'][:3]
+                
+                
+                
+                
+                if do_once_per_loop==0:
+                    print(shapesToBeReset)
+                    shape = shapesToBeReset[i]
+                    shape_pos = obj_poses['Shape' + shape][:3]
+
+                print("shape",shape)
+                action, state, do_once_per_loop = put_in_big_container(state, shape, obs, shape_pos, large_container_pos)
+                if (do_once_per_loop==0):
+                    if ((i+1)>=len(shapesToBeReset)) :
+                        mode=4
+                    else:   
+                        i+=1
+                        shape = shapesToBeReset[i]
+                
+        elif mode==4:
+            print("end")
+            state = 0
+            update = 0
+            count = 0
+            shape = '0'
+            mode=0
+            descriptions, obs = task.reset()
+            action = np.concatenate((obs.gripper_pose, np.array([1])))
+
+        obs, reward, terminate = task.step(action)
 
     env.shutdown()
diff --git a/test.jpg b/test.jpg
new file mode 100644
index 0000000..1fd9f8c
Binary files /dev/null and b/test.jpg differ