You are on page 1of 7

i

mpor
tnumpyasnp

#Mat
ri
ksr
epr
esent
asi
li
ngkungan

#0:
jal
ur,
1:r
int
angan,
2:t
ujuan

env
ironment=np.
arr
ay(
[

[
0,0,
0,1,
0,0]
,

[
0,1,
0,1,
0,0]
,

[
0,1,
0,0,
0,2]
,

[
0,0,
0,1,
1,0]
,

[
1,0,
0,0,
0,0]

]
)

#I
nisi
ali
sasi
par
amet
erQ-
tabl
e

q_
tabl
e=np.
zer
os(
(5,
6))

#Hy
per
par
amet
ers

l
ear
ning_
rat
e=0.
1

di
scount
_fact
or=0.
9

expl
orat
ion_
rat
e=0.
1

epochs=1000

#Al
gor
it
maQ-
Lear
ning

f
orepochi
nrange(
epochs)
:

st
ate=(
0,0)#Agenmul
aidar
iti
ti
kawal

whi
l
eTr
ue:

#Memi
l
iht
indakan(
act
ion)

i
fnp.
random.
uni
for
m(0,
1)<expl
orat
ion_
rat
e:

act
ion=np.
random.
randi
nt(
0,4)#Pi
l
ihaksi
acak

el
se:

act
ion=np.
argmax(
q_t
abl
e[st
ate]
)#Pi
l
ihaksi
ter
bai
kber
dasar
kanQ-
tabl
e
#Mel
akukant
indakandanmendapat
kanr
ewar
d

next
_st
ate=(
stat
e[0]+(
act
ion==0)-(
act
ion==1)
,st
ate[
1]+(
act
ion==2)-(
act
ion==3)
)

r
ewar
d=-
1ifenv
ironment
[next
_st
ate]==0el
se(
10i
fenv
ironment
[next
_st
ate]==2el
se-
100)

#Updat
eQ-
tabl
eber
dasar
kanr
ewar
d

q_
tabl
e[st
ate]
[act
ion]=q_
tabl
e[st
ate]
[act
ion]+l
ear
ning_
rat
e*(

r
ewar
d+di
scount
_fact
or*np.
max(
q_t
abl
e[next
_st
ate]
)-q_
tabl
e[st
ate]
[act
ion]
)

st
ate=next
_st
ate

#Mengecekapakahagent
elahmencapai
tuj
uanat
aumencapai
bat
asi
ter
asi

i
fenv
ironment
[st
ate]==2orepoch==epochs-1:

br
eak

#Pr
intQ-
tabl
ehasi
lpembel
ajar
an

pr
int
("
Q-t
abl
e:"
)

pr
int
(q_
tabl
e)

You might also like