You are on page 1of 2

ASSIGNMENT - 6

In [11]:

import gym
import numpy as np
import random

In [12]:

env= gym.make('FrozenLake-v1') #, render_mode='human')

In [13]:

Q = {}
for s in range(env.observation_space.n):
for a in range(env.action_space.n):
Q[(s,a)] = 0.0

In [14]:
def epsilon_greedy (state, epsilon):
if random.uniform(0,1) < epsilon:
return env.action_space.sample()
else:
return max(list(range(env.action_space.n)), key= lambda x:
Q[(state,x)])

In [15]:
alpha=0.85
gamma= 0.90
epsilon = 0.8

In [16]:
num_episodes = 50000
num_timesteps= 1000

In [17]:
for i in range(num_episodes):
s = env.reset()[0]
for t in range(num_timesteps):
a = epsilon_greedy(s, epsilon)
s_,r, done, _, trash = env.step(a)
a_ = np.argmax([Q[(s_, a)] for a in range(env.action_space.n)])
Q[(s,a)] += alpha * (r + gamma * Q[(s_,a_)]-Q[(s,a)])
s = s_
if done:
break

In [18]:
Q

Out[18]:
{(0, 0): 0.23477961696373423,
(0, 1): 0.22480181183703787,
(0, 2): 0.23961716957752016,
(0, 3): 0.24066398243905854,
(1, 0): 0.2204815896999076,
(1, 1): 0.04017125915710931,
(1, 2): 0.2822227428738474,
(1, 3): 0.22490808477046206,
(2, 0): 0.29961284509447655,
(2, 0): 0.29961284509447655,
(2, 1): 0.32990657866523887,
(2, 2): 0.37292229711147334,
(2, 3): 0.2790710024900863,
(3, 0): 0.25000597793284357,
(3, 1): 0.2575759230383145,
(3, 2): 0.037377204692152305,
(3, 3): 0.32551898596551954,
(4, 0): 0.3804023551933965,
(4, 1): 0.00856676265665978,
(4, 2): 0.5076563484150082,
(4, 3): 0.050394379122136346,
(5, 0): 0.0,
(5, 1): 0.0,
(5, 2): 0.0,
(5, 3): 0.0,
(6, 0): 0.4285911909119954,
(6, 1): 0.0002831967627810342,
(6, 2): 0.692932809233417,
(6, 3): 0.006210297861473632,
(7, 0): 0.0,
(7, 1): 0.0,
(7, 2): 0.0,
(7, 3): 0.0,
(8, 0): 0.4724947380728043,
(8, 1): 0.5292926568861616,
(8, 2): 0.0854144618498413,
(8, 3): 0.4739574281045383,
(9, 0): 0.07482359865928406,
(9, 1): 0.7499983936496128,
(9, 2): 0.5420577123103719,
(9, 3): 0.07627448541464799,
(10, 0): 0.5934266484891275,
(10, 1): 0.8240260740178592,
(10, 2): 0.774672222464751,
(10, 3): 0.09258352148159432,
(11, 0): 0.0,
(11, 1): 0.0,
(11, 2): 0.0,
(11, 3): 0.0,
(12, 0): 0.0,
(12, 1): 0.0,
(12, 2): 0.0,
(12, 3): 0.0,
(13, 0): 0.5646633544813056,
(13, 1): 0.11021736826449313,
(13, 2): 0.6234923802366498,
(13, 3): 0.7062633423537164,
(14, 0): 0.807111703090516,
(14, 1): 0.6835467756183892,
(14, 2): 0.9109203529502328,
(14, 3): 0.7967422343382821,
(15, 0): 0.0,
(15, 1): 0.0,
(15, 2): 0.0,
(15, 3): 0.0}

You might also like