Professional Documents
Culture Documents
Rafael Rangga - IF-7
Rafael Rangga - IF-7
mpor
tnumpyasnp
#Mat
ri
ksr
epr
esent
asi
li
ngkungan
#0:
jal
ur,
1:r
int
angan,
2:t
ujuan
env
ironment=np.
arr
ay(
[
[
0,0,
0,1,
0,0]
,
[
0,1,
0,1,
0,0]
,
[
0,1,
0,0,
0,2]
,
[
0,0,
0,1,
1,0]
,
[
1,0,
0,0,
0,0]
]
)
#I
nisi
ali
sasi
par
amet
erQ-
tabl
e
q_
tabl
e=np.
zer
os(
(5,
6))
#Hy
per
par
amet
ers
l
ear
ning_
rat
e=0.
1
di
scount
_fact
or=0.
9
expl
orat
ion_
rat
e=0.
1
epochs=1000
#Al
gor
it
maQ-
Lear
ning
f
orepochi
nrange(
epochs)
:
st
ate=(
0,0)#Agenmul
aidar
iti
ti
kawal
whi
l
eTr
ue:
#Memi
l
iht
indakan(
act
ion)
i
fnp.
random.
uni
for
m(0,
1)<expl
orat
ion_
rat
e:
act
ion=np.
random.
randi
nt(
0,4)#Pi
l
ihaksi
acak
el
se:
act
ion=np.
argmax(
q_t
abl
e[st
ate]
)#Pi
l
ihaksi
ter
bai
kber
dasar
kanQ-
tabl
e
#Mel
akukant
indakandanmendapat
kanr
ewar
d
next
_st
ate=(
stat
e[0]+(
act
ion==0)-(
act
ion==1)
,st
ate[
1]+(
act
ion==2)-(
act
ion==3)
)
r
ewar
d=-
1ifenv
ironment
[next
_st
ate]==0el
se(
10i
fenv
ironment
[next
_st
ate]==2el
se-
100)
#Updat
eQ-
tabl
eber
dasar
kanr
ewar
d
q_
tabl
e[st
ate]
[act
ion]=q_
tabl
e[st
ate]
[act
ion]+l
ear
ning_
rat
e*(
r
ewar
d+di
scount
_fact
or*np.
max(
q_t
abl
e[next
_st
ate]
)-q_
tabl
e[st
ate]
[act
ion]
)
st
ate=next
_st
ate
#Mengecekapakahagent
elahmencapai
tuj
uanat
aumencapai
bat
asi
ter
asi
i
fenv
ironment
[st
ate]==2orepoch==epochs-1:
br
eak
#Pr
intQ-
tabl
ehasi
lpembel
ajar
an
pr
int
("
Q-t
abl
e:"
)
pr
int
(q_
tabl
e)