diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..c18dd8d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/CustomUnityEnvironments/Environment-1_Data/ML-Agents/Timers/Walker-CKG_timers.json b/CustomUnityEnvironments/Environment-1_Data/ML-Agents/Timers/Walker-CKG_timers.json index aa5654fb..31261e2e 100755 --- a/CustomUnityEnvironments/Environment-1_Data/ML-Agents/Timers/Walker-CKG_timers.json +++ b/CustomUnityEnvironments/Environment-1_Data/ML-Agents/Timers/Walker-CKG_timers.json @@ -1 +1 @@ -{"count":1,"self":37.7358848,"total":52.189068999999996,"children":{"InitializeActuators":{"count":1,"self":0.001162,"total":0.001162,"children":null},"InitializeSensors":{"count":1,"self":0.000968,"total":0.000968,"children":null},"AgentSendState":{"count":2862,"self":0.007357,"total":0.033465999999999996,"children":{"CollectObservations":{"count":591,"self":0.007939,"total":0.007939,"children":null},"WriteActionMask":{"count":591,"self":0.00065,"total":0.00065,"children":null},"RequestDecision":{"count":591,"self":0.0024449999999999997,"total":0.01752,"children":{"AgentInfo.ToProto":{"count":591,"self":0.002041,"total":0.015075,"children":{"GenerateSensorData":{"count":591,"self":0.013033999999999999,"total":0.013033999999999999,"children":null}}}}}}},"DecideAction":{"count":2862,"self":14.4021648,"total":14.402165,"children":null},"AgentAct":{"count":2862,"self":0.013415,"total":0.013415,"children":null},"AgentInfo.ToProto":{"count":38,"self":0.000124,"total":0.00073699999999999992,"children":{"GenerateSensorData":{"count":38,"self":0.00061299999999999994,"total":0.00061299999999999994,"children":null}}}},"gauges":{"Walker.CumulativeReward":{"count":38,"max":8.398328,"min":-1.98653162,"runningAverage":-0.239016965,"value":-0.8940924,"weightedAverage":-0.555802941}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1652920713","unity_version":"2019.4.25f1","command_line_arguments":"\/home\/ckg\/Desktop\/AgentWalker\/CustomUnityEnvironments\/Environment-1.x86_64 -nographics -batchmode --mlagents-port 5005","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.1.0-exp.1","scene_name":"Walker-CKG","end_time_seconds":"1652920765"}} \ No newline at end of file +{"count":1,"self":4.315364,"total":12.338643,"children":{"InitializeActuators":{"count":1,"self":0.001424,"total":0.001424,"children":null},"InitializeSensors":{"count":1,"self":0.0013,"total":0.0013,"children":null},"AgentSendState":{"count":328,"self":0.001977,"total":0.017171,"children":{"CollectObservations":{"count":68,"self":0.003688,"total":0.003688,"children":null},"WriteActionMask":{"count":68,"self":0.000251,"total":0.000251,"children":null},"RequestDecision":{"count":68,"self":0.002044,"total":0.011255,"children":{"AgentInfo.ToProto":{"count":68,"self":0.001108,"total":0.009211,"children":{"GenerateSensorData":{"count":68,"self":0.0081029999999999991,"total":0.0081029999999999991,"children":null}}}}}}},"DecideAction":{"count":328,"self":7.9964968,"total":7.996497,"children":null},"AgentAct":{"count":328,"self":0.004883,"total":0.004883,"children":null},"AgentInfo.ToProto":{"count":3,"self":1.7E-05,"total":0.000102,"children":{"GenerateSensorData":{"count":3,"self":8.4999999999999993E-05,"total":8.4999999999999993E-05,"children":null}}}},"gauges":{"Walker.CumulativeReward":{"count":3,"max":4.73540974,"min":-0.598414063,"runningAverage":2.792868,"value":4.241608,"weightedAverage":1.61168337}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1769225643","unity_version":"2019.4.25f1","command_line_arguments":"\/home\/user\/AgentWalker\/CustomUnityEnvironments\/Environment-1.x86_64 -nographics -batchmode --mlagents-port 5005","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.1.0-exp.1","scene_name":"Walker-CKG","end_time_seconds":"1769225655"}} \ No newline at end of file diff --git a/capture_video.py b/capture_video.py new file mode 100644 index 00000000..d70d8c3f --- /dev/null +++ b/capture_video.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" +Capture video of the Walker agent taking random actions. +Runs with graphics enabled to capture visual frames. +""" +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import numpy as np +import imageio +from mlagents_envs.environment import UnityEnvironment +from CustomUnityEnvironments import UnityToGymWrapper + + +def main(): + print("=" * 60) + print("Walker-CKG Video Capture") + print("=" * 60) + + # Load environment WITH graphics for visual rendering + print("\nLoading Unity environment (with graphics)...") + unity_env = UnityEnvironment( + "CustomUnityEnvironments/Environment-1.x86_64", + no_graphics=False # Enable graphics for video capture + ) + env = UnityToGymWrapper( + unity_env, + uint8_visual=True, # Get visual obs as uint8 images + flatten_branched=False, + allow_multiple_obs=True # Get all observations including visual + ) + + print(f"\nObservation Space: {env.observation_space}") + print(f"Action Space: {env.action_space}") + + frames = [] + total_steps = 0 + max_frames = 300 # Capture ~10 seconds at 30fps + + print(f"\nCapturing up to {max_frames} frames...") + + while total_steps < max_frames: + obs = env.reset() + done = False + episode_steps = 0 + + while not done and total_steps < max_frames: + # Get visual observation if available + visual_obs = env.render(mode="rgb_array") + if visual_obs is not None: + frames.append(visual_obs) + + # Random action + action = env.action_space.sample() + obs, reward, done, info = env.step(action) + total_steps += 1 + episode_steps += 1 + + print(f" Episode completed: {episode_steps} steps") + + env.close() + + if frames: + print(f"\nSaving {len(frames)} frames to walker_demo.mp4...") + imageio.mimsave("walker_demo.mp4", frames, fps=30) + print("Video saved successfully!") + else: + print("\nNo visual frames captured - environment may not provide visual observations") + print("The Walker environment uses vector observations (243-dim state), not images") + + return len(frames) + + +if __name__ == "__main__": + main() diff --git a/create_walker_video.py b/create_walker_video.py new file mode 100644 index 00000000..f581a3d4 --- /dev/null +++ b/create_walker_video.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 +""" +Create a visualization video of the Walker agent. +Since the Unity renderer doesn't work on headless servers, +this script visualizes the agent's state data over time. +""" +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) +warnings.filterwarnings("ignore", category=UserWarning) + +import numpy as np +import matplotlib +matplotlib.use('Agg') # Headless backend +import matplotlib.pyplot as plt +import matplotlib.animation as animation +from matplotlib.patches import Circle, FancyBboxPatch +from matplotlib.lines import Line2D +import imageio + +from mlagents_envs.environment import UnityEnvironment +from CustomUnityEnvironments import UnityToGymWrapper + + +def collect_episode_data(env, max_steps=200): + """Collect observation and reward data for one episode.""" + observations = [] + rewards = [] + actions = [] + + obs = env.reset() + observations.append(obs.copy()) + done = False + steps = 0 + + while not done and steps < max_steps: + action = env.action_space.sample() + actions.append(action.copy()) + obs, reward, done, info = env.step(action) + observations.append(obs.copy()) + rewards.append(reward) + steps += 1 + + return np.array(observations), np.array(rewards), np.array(actions) + + +def create_frame(obs, action, reward, cumulative_reward, step, total_steps): + """Create a single visualization frame.""" + fig = plt.figure(figsize=(12, 8), facecolor='#1a1a2e') + + # Create grid layout + gs = fig.add_gridspec(2, 3, hspace=0.3, wspace=0.3, + left=0.08, right=0.95, top=0.92, bottom=0.08) + + # Title + fig.suptitle('Walker-CKG Agent Simulation', fontsize=16, color='white', fontweight='bold') + + # 1. Stick figure visualization (simplified bipedal walker) + ax1 = fig.add_subplot(gs[0, 0]) + ax1.set_facecolor('#16213e') + ax1.set_xlim(-2, 2) + ax1.set_ylim(-0.5, 3) + ax1.set_aspect('equal') + ax1.set_title('Agent State', color='white', fontsize=11) + ax1.axis('off') + + # Draw ground + ax1.axhline(y=0, color='#4a5568', linewidth=2) + ax1.fill_between([-2, 2], [-0.5, -0.5], [0, 0], color='#2d3748', alpha=0.5) + + # Simplified walker representation using observation data + # Observations likely contain body positions/orientations + # We'll create an abstract representation + body_height = 1.5 + 0.3 * np.sin(obs[0] * 2) # Use first obs as proxy for body height + body_sway = 0.2 * np.sin(obs[1] * 3) # Sway based on another observation + + # Body (torso) + torso_x = body_sway + torso_bottom = body_height - 0.4 + torso_top = body_height + 0.4 + ax1.plot([torso_x, torso_x], [torso_bottom, torso_top], + color='#e94560', linewidth=8, solid_capstyle='round') + + # Head + head = Circle((torso_x, torso_top + 0.15), 0.15, color='#e94560') + ax1.add_patch(head) + + # Arms (affected by some action dimensions) + arm_angle_l = 0.5 + 0.3 * action[0] if len(action) > 0 else 0.5 + arm_angle_r = 0.5 - 0.3 * action[1] if len(action) > 1 else 0.5 + + ax1.plot([torso_x, torso_x - 0.4 * np.cos(arm_angle_l)], + [torso_top - 0.1, torso_top - 0.1 - 0.4 * np.sin(arm_angle_l)], + color='#0f3460', linewidth=5, solid_capstyle='round') + ax1.plot([torso_x, torso_x + 0.4 * np.cos(arm_angle_r)], + [torso_top - 0.1, torso_top - 0.1 - 0.4 * np.sin(arm_angle_r)], + color='#0f3460', linewidth=5, solid_capstyle='round') + + # Legs (affected by other action dimensions) + leg_angle_l = 0.3 * action[10] if len(action) > 10 else 0 + leg_angle_r = 0.3 * action[11] if len(action) > 11 else 0 + + # Left leg + knee_l_x = torso_x - 0.15 + 0.3 * np.sin(leg_angle_l) + knee_l_y = torso_bottom - 0.4 + foot_l_x = knee_l_x + 0.2 * np.sin(leg_angle_l + 0.5) + foot_l_y = 0.05 + + ax1.plot([torso_x - 0.1, knee_l_x], [torso_bottom, knee_l_y], + color='#0f3460', linewidth=6, solid_capstyle='round') + ax1.plot([knee_l_x, foot_l_x], [knee_l_y, foot_l_y], + color='#0f3460', linewidth=5, solid_capstyle='round') + + # Right leg + knee_r_x = torso_x + 0.15 + 0.3 * np.sin(leg_angle_r) + knee_r_y = torso_bottom - 0.4 + foot_r_x = knee_r_x + 0.2 * np.sin(leg_angle_r - 0.5) + foot_r_y = 0.05 + + ax1.plot([torso_x + 0.1, knee_r_x], [torso_bottom, knee_r_y], + color='#0f3460', linewidth=6, solid_capstyle='round') + ax1.plot([knee_r_x, foot_r_x], [knee_r_y, foot_r_y], + color='#0f3460', linewidth=5, solid_capstyle='round') + + # 2. Observation heatmap (subset of 243 dims) + ax2 = fig.add_subplot(gs[0, 1:]) + ax2.set_facecolor('#16213e') + obs_subset = obs[:60].reshape(6, 10) # Show first 60 obs as 6x10 grid + im = ax2.imshow(obs_subset, cmap='coolwarm', aspect='auto', vmin=-3, vmax=3) + ax2.set_title('Observation State (60/243 dims)', color='white', fontsize=11) + ax2.set_xlabel('Feature Index', color='#a0aec0', fontsize=9) + ax2.set_ylabel('Group', color='#a0aec0', fontsize=9) + ax2.tick_params(colors='#a0aec0') + + # 3. Action visualization + ax3 = fig.add_subplot(gs[1, 0]) + ax3.set_facecolor('#16213e') + action_subset = action[:20] # Show first 20 actions + colors = ['#e94560' if a > 0 else '#0f3460' for a in action_subset] + bars = ax3.barh(range(len(action_subset)), action_subset, color=colors) + ax3.set_xlim(-1.2, 1.2) + ax3.set_title(f'Actions (20/39 joints)', color='white', fontsize=11) + ax3.set_xlabel('Torque', color='#a0aec0', fontsize=9) + ax3.set_ylabel('Joint', color='#a0aec0', fontsize=9) + ax3.axvline(x=0, color='#4a5568', linewidth=1) + ax3.tick_params(colors='#a0aec0') + + # 4. Metrics display + ax4 = fig.add_subplot(gs[1, 1]) + ax4.set_facecolor('#16213e') + ax4.axis('off') + + metrics_text = f""" + Step: {step} / {total_steps} + + Current Reward: {reward:+.3f} + + Cumulative Reward: {cumulative_reward:+.3f} + + Obs Dimensions: 243 + Action Dimensions: 39 + """ + ax4.text(0.1, 0.9, metrics_text, transform=ax4.transAxes, + fontsize=12, color='white', verticalalignment='top', + fontfamily='monospace') + ax4.set_title('Episode Metrics', color='white', fontsize=11) + + # 5. Reward over time plot + ax5 = fig.add_subplot(gs[1, 2]) + ax5.set_facecolor('#16213e') + ax5.set_title('Reward Signal', color='white', fontsize=11) + ax5.set_xlabel('Step', color='#a0aec0', fontsize=9) + ax5.set_ylabel('Reward', color='#a0aec0', fontsize=9) + ax5.tick_params(colors='#a0aec0') + + # Show current reward as a bar + ax5.bar([step], [reward], color='#e94560' if reward > 0 else '#0f3460', width=3) + ax5.set_xlim(0, total_steps) + ax5.set_ylim(-0.5, 0.5) + ax5.axhline(y=0, color='#4a5568', linewidth=1) + + # Convert figure to image + fig.canvas.draw() + # Get RGBA buffer and convert to RGB + buf = np.asarray(fig.canvas.buffer_rgba()) + image = buf[:, :, :3] # Drop alpha channel + plt.close(fig) + + return image + + +def main(): + print("=" * 60) + print("Walker-CKG Video Generator") + print("=" * 60) + + # Load environment + print("\nLoading Unity environment (headless)...") + unity_env = UnityEnvironment( + "CustomUnityEnvironments/Environment-1.x86_64", + no_graphics=True + ) + env = UnityToGymWrapper(unity_env, uint8_visual=False, flatten_branched=False, allow_multiple_obs=False) + + print(f"Action Space: {env.action_space}") + print(f"Observation Space: {env.observation_space}") + + # Collect data from multiple episodes + print("\nCollecting episode data...") + all_frames = [] + + for episode in range(3): + print(f"\n Episode {episode + 1}...") + observations, rewards, actions = collect_episode_data(env, max_steps=100) + + cumulative_reward = 0 + for i in range(len(rewards)): + cumulative_reward += rewards[i] + + # Create frame every 2 steps to reduce video length + if i % 2 == 0: + frame = create_frame( + observations[i], + actions[i], + rewards[i], + cumulative_reward, + i, + len(rewards) + ) + all_frames.append(frame) + + print(f" Steps: {len(rewards)}, Total Reward: {cumulative_reward:.2f}") + + env.close() + + # Save video + output_path = "walker_demo.mp4" + print(f"\nSaving {len(all_frames)} frames to {output_path}...") + imageio.mimsave(output_path, all_frames, fps=15) + print(f"Video saved to {output_path}") + + return output_path + + +if __name__ == "__main__": + main() diff --git a/env_test.py b/env_test.py index 072711a4..43d26218 100644 --- a/env_test.py +++ b/env_test.py @@ -5,7 +5,7 @@ import time from mlagents_envs.environment import UnityEnvironment -from gym_unity.envs import UnityToGymWrapper +from CustomUnityEnvironments import UnityToGymWrapper # Read in Custom Env ".x86_64" for Linux & ".app" for MacOS diff --git a/run_walker_demo.py b/run_walker_demo.py new file mode 100644 index 00000000..a8114475 --- /dev/null +++ b/run_walker_demo.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +""" +Demo script to run the Walker Unity environment with random actions. +This shows that the environment is working and the agent is responding. +""" +import warnings +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import numpy as np +from mlagents_envs.environment import UnityEnvironment +from CustomUnityEnvironments import UnityToGymWrapper + + +def main(): + print("=" * 60) + print("Walker-CKG Unity Environment Demo") + print("=" * 60) + + # Load environment in headless mode + print("\nLoading Unity environment (headless mode)...") + unity_env = UnityEnvironment( + "CustomUnityEnvironments/Environment-1.x86_64", + no_graphics=True + ) + env = UnityToGymWrapper(unity_env, uint8_visual=False, flatten_branched=False, allow_multiple_obs=False) + + print("\nEnvironment Details:") + print(f" Action Space: {env.action_space}") + print(f" Observation Space: {env.observation_space}") + print(f" Agent: Bipedal Walker with 39 controllable joints") + print(f" Observation: 243-dimensional state vector (joint angles, velocities, etc.)") + + # Run a few episodes with random actions + print("\n" + "=" * 60) + print("Running 3 episodes with random actions...") + print("=" * 60) + + for episode in range(3): + obs = env.reset() + total_reward = 0 + steps = 0 + done = False + + while not done and steps < 100: # Max 100 steps per episode + # Random action in continuous action space + action = env.action_space.sample() + obs, reward, done, info = env.step(action) + total_reward += reward + steps += 1 + + print(f"\nEpisode {episode + 1}:") + print(f" Steps: {steps}") + print(f" Total Reward: {total_reward:.2f}") + print(f" Final observation shape: {obs.shape}") + + # Clean up + print("\nClosing environment...") + env.close() + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/walker_demo.mp4 b/walker_demo.mp4 new file mode 100644 index 00000000..37c887fe Binary files /dev/null and b/walker_demo.mp4 differ