-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathReinforcementLearning.py
More file actions
46 lines (34 loc) · 1.14 KB
/
ReinforcementLearning.py
File metadata and controls
46 lines (34 loc) · 1.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
environment = gym.make("FrozenLake-v1",is_slippery=False,render_mode="ansi")
environment.reset()
nb_states=environment.observation_space.n
nb_action =environment.action_space.n
qtable= np.zeros((nb_states,nb_action))
print("q table:")
print(qtable)
# action = environment.action_space.sample()
# new_state , reward ,done , info, _= environment.step(action)
episodes= 1000
alpha=0.5 #learning rate
gamma= 0.9 #discount rate
outcomes=[]
for _ in range(episodes):
state,_=environment.reset()
done=False
outcomes.append("Failure")
while not done:
if np.max(qtable[state])>0:
action = np.argmax(qtable[state])
else:
action = environment.action_space.sample()
new_state , reward ,done , info, _= environment.step(action)
qtable[state,action]=qtable[state,action]+alpha*(reward+gamma*np.max(qtable[new_state])-qtable[state,action])
state=new_state
if reward:
outcomes[-1]="Success"
print("Qtable After Training:")
print(qtable)
plt.bar(range(episodes),outcomes)
plt.show()