-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
128 lines (104 loc) · 3.54 KB
/
utils.py
File metadata and controls
128 lines (104 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import matplotlib.pyplot as plt
from matplotlib import colors
from collections import deque
import numpy as np
import random
import time
# Replay Buffer
class ReplayBuffer():
def __init__(self, max_size, input_shape):
self.mem_size = max_size
self.mem_cntr = 0
self.state_memory = np.zeros((self.mem_size, *input_shape), dtype=np.float32)
self.new_state_memory = np.zeros((self.mem_size, *input_shape), dtype=np.float32)
self.action_memory = np.zeros(self.mem_size, dtype=np.int64)
self.reward_memory = np.zeros(self.mem_size, dtype=np.float32)
self.terminal_memory = np.zeros(self.mem_size, dtype=np.float32)
def store_transition(self, state, action, reward, next_state, done):
index = self.mem_cntr % self.mem_size
self.state_memory[index] = state
self.new_state_memory[index] = next_state
self.action_memory[index] = action
self.reward_memory[index] = reward
self.terminal_memory[index] = done
self.mem_cntr += 1
def sample(self, batch_size):
max_mem = min(self.mem_cntr, self.mem_size)
batch = np.random.choice(max_mem, batch_size, replace = False)
states = self.state_memory[batch]
new_states = self.new_state_memory[batch]
actions = self.action_memory[batch]
rewards = self.reward_memory[batch]
dones = self.terminal_memory[batch]
return states, actions, rewards, new_states, dones
def __len__(self):
return min(self.mem_cntr, self.mem_size)
# Plotting
def plot_time_series(grid):
for t in grid.individual_types:
plt.plot(grid.type_timeseries[t])
plt.title('Population timeline of different individual types')
plt.legend(grid.individual_types,loc='upper right', shadow=True)
plt.show()
def plot_grid(grid,gridlines,color_list):
data=grid.grid
n=grid.grid_size
# create discrete colormap
cmap = colors.ListedColormap(color_list)
bounds=[-0.5]
for i in range(grid.no_types):
bounds.append(bounds[i]+1)
norm = colors.BoundaryNorm(bounds, cmap.N)
fig, ax = plt.subplots()
ax.imshow(data, cmap=cmap, norm=norm)
# draw gridlines
if(gridlines):
ax.grid(which='major', axis='both', linestyle='-', color='k', linewidth=2)
ax.set_xticks(np.arange(-.5, n, 1));
ax.set_yticks(np.arange(-.5, n, 1));
plt.show()
def animate(grid, gridlines,color_list, time):
for g in grid.store:
data=g
n=grid.grid_size
# create discrete colormap
cmap = colors.ListedColormap(color_list)
bounds=[-0.5]
for i in range(grid.no_types):
bounds.append(bounds[i]+1)
norm = colors.BoundaryNorm(bounds, cmap.N)
fig, ax = plt.subplots()
ax.imshow(data, cmap=cmap, norm=norm)
# draw gridlines
if(gridlines):
ax.grid(which='major', axis='both', linestyle='-', color='k', linewidth=2)
ax.set_xticks(np.arange(-.5, n, 1))
ax.set_yticks(np.arange(-.5, n, 1))
plt.pause(time)
time_delay(3)
for g in grid.store:
plt.close()
def plot_learning_curve(scores, eps_history):
x = [i+1 for i in range(len(scores))]
fig = plt.figure()
ax = fig.add_subplot(111, label="1")
ax2 = fig.add_subplot(111, label="2", frame_on=False)
ax.plot(x, eps_history, color="C0")
ax.set_xlabel("Training Steps", color="C0")
ax.set_ylabel("Epsilon", color="C0")
ax.tick_params(axis='x', colors="C0")
ax.tick_params(axis='y', colors="C0")
N = len(scores)
running_avg = np.empty(N)
for t in range(N):
running_avg[t] = np.mean(scores[max(0, t-100):(t+1)])
ax2.scatter(x, running_avg, color="C1")
ax2.axes.get_xaxis().set_visible(False)
ax2.yaxis.tick_right()
ax2.set_ylabel('Score', color="C1")
ax2.yaxis.set_label_position('right')
ax2.tick_params(axis='y', colors="C1")
plt.show()
# Misc
def time_delay(sec):
time.sleep(sec)