Professional Documents
Culture Documents
Py Code Example 11 0 Baird Emphatic TD
Py Code Example 11 0 Baird Emphatic TD
##################
import numpy as np
import matplotlib.pyplot as plt
# parameter settings
n1 = 7 # number of states
n2 = n1 # number of working states
alpha = 0.03
#prob_eps = 0.1
discount = 0.99
#eps = 1.0e-7 # should be small enough
# building environment
def b_move(state):
rand_num = np.random.uniform(low = 0, high = 1, size = 1)
if rand_num <= 6/7:
move = 0 # dashed action
else:
move = 1 # solid action
return move
rho
def v_value(state, w):
if state == 6:
value = w[6] + 2 * w[7]
else:
value = 2 * w[state] + w[7]
return value
# main loop
seed = 543
np.random.seed(seed) # Set random seed for reproducibility.
interest = [1, 1, 1, 1, 1, 1, 1]
#step_num = 1000
print(w_matrix)
for i in range(7):
state_value[i] = v_value(i, w)
print(state_value)
# Figure 11.6
plt.figure("Figure 11.6")
for j in range(8):
plt.plot(range(step_num + 1), w_matrix[:, j],
color = colors[j], linestyle = linestyles[j%4])
plt.xlabel("step")
plt.ylabel("weight value")
plt.legend(['w1', 'w2', 'w3', 'w4', 'w5', 'w6', 'w7', 'w8'], loc = "best", frameon
= False)
plt.show()
##################