-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathneuroevolution_agent.py
More file actions
74 lines (67 loc) · 3.06 KB
/
neuroevolution_agent.py
File metadata and controls
74 lines (67 loc) · 3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
from dynamicalgorithmselection.agents.agent import Agent
from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
class NeuroevolutionAgent(Agent):
def __init__(self, problem, options):
Agent.__init__(self, problem, options)
self.net = options["net"]
def _collect(self, fitness, y=None):
if y is not None:
self._print_verbose_info(fitness, y)
results, _ = super()._collect(self, fitness)
results["_n_generations"] = self._n_generations
results["mean_reward"] = sum(self.rewards) / len(self.rewards)
results["actions"] = self.choices_history
results.update(
{
"reward_normalizer": self.reward_normalizer,
}
)
return results
def optimize(self, fitness_function=None, args=None):
fitness = Optimizer.optimize(self, fitness_function)
x, y, reward = None, None, None
iteration_result = {"x": x, "y": y}
x_history, y_history = None, None
step_idx = 0
while not self._check_terminations():
state = self.get_state(x, y, x_history, y_history, self.train_mode)
state = np.nan_to_num(state, nan=0.5, neginf=0.0, posinf=1.0)
policy = self.net.activate(state)
action = np.argmax(policy)
self.choices_history.append(action)
action_options = {k: v for k, v in self.options.items()}
action_options["max_function_evaluations"] = min(
self.checkpoints[self._n_generations],
self.max_function_evaluations,
)
action_options["verbose"] = False
optimizer = self.actions[action](self.problem, action_options)
optimizer.n_function_evaluations = self.n_function_evaluations
setattr(optimizer, "_n_generations", 0)
best_parent = self.best_so_far_y
iteration_result = self.iterate(iteration_result, optimizer)
x, y = iteration_result.get("x"), iteration_result.get("y")
x_history, y_history = (
iteration_result.get("x_history"),
iteration_result.get("y_history"),
)
new_best_y = self.best_so_far_y
reward = self.get_reward(new_best_y, best_parent)
reward = self.reward_normalizer.normalize(reward, step_idx)
self.rewards.append(reward)
if self.run:
self.run.log({"reward": reward})
self.n_function_evaluations = optimizer.n_function_evaluations
if self.train_mode:
pass
self._print_verbose_info(fitness, y)
if optimizer.best_so_far_y >= self.best_so_far_y:
self.stagnation_count += (
optimizer.n_function_evaluations - self.n_function_evaluations
)
else:
self.stagnation_count = 0
self.n_function_evaluations = optimizer.n_function_evaluations
step_idx += 1
return self._collect(fitness, self.best_so_far_y)