1 Question No. 1 Synthetic Data Generation and Simple Curve Fitting

ML1
February 13, 2019
1 Question no. 1 Synthetic data generation and simple curve fitting

1.1 a) dataset size=10
In [113]: import numpy as np
In [168]: x=np.random.uniform(0,1,10);x.sort()
% store x
x
Stored 'x' (ndarray)
Out[168]: array([4.56572618e-05, 3.20138948e-02, 7.33332470e-02, 8.22840269e-02,

1.00608680e-01, 1.41989948e-01, 3.02205900e-01, 5.09208602e-01,
5.48487424e-01, 9.16432859e-01])
In [169]: y=np.sin(2*np.pi*x)+ np.random.normal(0,0.3,10)

% store y
y
Stored 'y' (ndarray)
Out[169]: array([ 0.03583561, 0.08863262, 0.26436301, 0.73229327, 0.6498295 ,

0.76267081, 1.08759352, 0.10366157, -0.41853198, -0.38551607])
In [170]: % store -r
In [171]: (x,y)
Out[171]: (array([4.56572618e-05, 3.20138948e-02, 7.33332470e-02, 8.22840269e-02,

1.00608680e-01, 1.41989948e-01, 3.02205900e-01, 5.09208602e-01,
5.48487424e-01, 9.16432859e-01]),
array([ 0.03583561, 0.08863262, 0.26436301, 0.73229327, 0.6498295 ,
0.76267081, 1.08759352, 0.10366157, -0.41853198, -0.38551607]))
1
1.2 b)
In [172]: from random import sample
a=sample(range(0,10),8);
% store a
Stored 'a' (list)
In [173]: % store -r
In [174]: index=[i for i in list(range(0,10)) if i not in list(a)]
In [175]: X_test=x[index]; y_test=y[index];

y_test=y_test.reshape(y_test.shape[0],1)
In [176]: X_train=x[a];
X_train
Out[176]: array([0.03201389, 0.5092086 , 0.3022059 , 0.54848742, 0.07333325,

0.10060868, 0.14198995, 0.91643286])
In [177]: y_train=y[a];y_train=y_train.reshape(y_train.shape[0],1)
y_train
Out[177]: array([[ 0.08863262],

[ 0.10366157],
[ 1.08759352],
[-0.41853198],
[ 0.26436301],
[ 0.6498295 ],
[ 0.76267081],
[-0.38551607]])
1.3 c)
In [178]: m=np.zeros((X_train.shape[0],10));
for i in range(0,10):
m[:,i]=X_train**i
M=np.zeros((X_test.shape[0],10));
M[:,i]=X_test**i
In [179]: X_train_1=m[:,[0,1]]; X_test_1=M[:,[0,1]]; theta1=np.zeros((2,1));
In [180]: def model(X,theta):

return(np.matmul(X,theta))
def cost(X,theta,y):
2
return(sum((np.matmul(X,theta)-y)**2)[0]/(2*X.shape[0]))
def gradient_descent(X,theta,y,alpha):
J=cost(X,theta,y)
theta1=theta-(alpha/X.shape[0])*(np.matmul(X.T,(model(X,theta)-y)))
J_opt=cost(X,theta1,y)
if(J<=J_opt):
break;
else:
theta=theta1
continue;
print("Number of iterations: ",i);
return(theta)
In [181]: theta_opt1=gradient_descent(X_train_1,theta1,y_train,0.05)
theta_opt1
Number of iterations: 4028
Out[181]: array([[ 0.61901605],

[-1.06674016]])
In [182]: test_error1=cost(X_test_1,theta_opt1,y_test); train_error1=cost(X_train_1,theta_opt1,

test_error1
Out[182]: 0.09511622271301672
In [183]: X_train_2=m[:,[0,1,2]]; X_test_2=M[:,[0,1,2]]; theta2=np.zeros((3,1))
theta_opt2
Out[184]: array([[ 0.45100356],

[ 0.39766551],
[-1.63541815]])

test_error2
Out[185]: 0.059948244666874276
In [186]: X_train_3=m[:,[0,1,2,3]]; X_test_3=M[:,[0,1,2,3]]; theta3=np.zeros((4,1))
theta_opt3
3
Out[187]: array([[ 0.30054854],

[ 3.03422839],
[-9.49857845],
[ 5.73426157]])

test_error3
Out[188]: 0.03232230527392706
In [189]: X_train_4=m[:,[0,1,2,3,4]]; X_test_4=M[:,[0,1,2,3,4]]; theta4=np.zeros((5,1))
theta_opt4
Out[190]: array([[ 0.13043307],

[ 5.26976736],
[-11.74893731],
[ -2.00083726],
[ 8.52197539]])

test_error4
Out[191]: 0.017688444243537934
In [192]: X_train_5=m[:,[0,1,2,3,4,5]]; X_test_5=M[:,[0,1,2,3,4,5]]; theta5=np.zeros((6,1))
theta_opt5
Out[193]: array([[ 0.02671602],

[ 6.41175294],
[-11.73377243],
[ -5.25211259],
[ 3.01348511],
[ 8.43915221]])

test_error5
4
Out[194]: 0.016944689569137104
In [195]: X_train_6=m[:,[0,1,2,3,4,5,6]]; X_test_6=M[:,[0,1,2,3,4,5,6]]; theta6=np.zeros((7,1)
theta_opt6
Out[196]: array([[ -0.02774013],

[ 6.91790094],
[-11.19503816],
[ -6.58032958],
[ 0.24918351],
[ 4.90843665],
[ 7.43361559]])

test_error6
Out[197]: 0.019256817795943935
In [198]: X_train_7=m[:,[0,1,2,3,4,5,6,7]]; X_test_7=M[:,[0,1,2,3,4,5,6,7]]; theta7=np.zeros((
theta_opt7
Out[199]: array([[ -0.05590717],

[ 7.13328798],
[-10.65608892],
[ -7.13811592],
[ -1.22017668],
[ 2.92885583],
[ 5.24233003],
[ 6.31298633]])

test_error7
Out[200]: 0.021328822695781458
In [201]: X_train_8=m[:,[0,1,2,3,4,5,6,7,8]]; X_test_8=M[:,[0,1,2,3,4,5,6,7,8]]; theta8=np.zer
theta_opt8
5
Out[202]: array([[ -0.07098847],

[ 7.2235418 ],
[-10.22581104],
[ -7.38324589],
[ -2.05894679],
[ 1.74728375],
[ 3.91118967],
[ 4.95068973],
[ 5.31900064]])

test_error8
Out[203]: 0.022763509050243616
In [204]: X_train_9=m[:,[0,1,2,3,4,5,6,7,8,9]]; X_test_9=M[:,[0,1,2,3,4,5,6,7,8,9]]; theta9=np
theta_opt9
Out[205]: array([[-0.07949424],
[ 7.2603034 ],
[-9.90234645],
[-7.49645706],
[-2.57160618],
[ 0.99907177],
[ 3.05693338],
[ 4.07070013],
[ 4.45792049],
[ 4.4952223 ]])

test_error9
Out[206]: 0.023714480158400306
1.4 Question no. 2 Visualization of the dataset and the fitted curves
1.5 a)
In [207]: import matplotlib.pyplot as plt
In [208]: plt.plot(x,y,'o')
Out[208]: [<matplotlib.lines.Line2D at 0x5c6272d978>]
6
In [209]: g=np.zeros((x.shape[0],10));
g[:,i]=x**i
In [210]: g1=g[:,[0,1]];
y1=model(g1,theta_opt1)
plt.plot(x,y,'o')
plt.plot(x,y1,marker='o')
Out[210]: [<matplotlib.lines.Line2D at 0x5c626ddb00>]
7
In [211]: g2=g[:,[0,1,2]];
plt.plot(x,y,'o')
Out[211]: [<matplotlib.lines.Line2D at 0x5c627a9ac8>]
8
In [212]: g3=g[:,[0,1,2,3]];
plt.plot(x,y,'o')
Out[212]: [<matplotlib.lines.Line2D at 0x5c637df6d8>]
In [213]: g4=g[:,[0,1,2,3,4]];
plt.plot(x,y,'o')
Out[213]: [<matplotlib.lines.Line2D at 0x5c627af9b0>]
9
In [214]: g5=g[:,[0,1,2,3,4,5]];
plt.plot(x,y,'o')
Out[214]: [<matplotlib.lines.Line2D at 0x5c63804b00>]
10
In [215]: g6=g[:,[0,1,2,3,4,5,6]];
plt.plot(x,y,'o')
Out[215]: [<matplotlib.lines.Line2D at 0x5c638ee550>]
In [216]: g7=g[:,[0,1,2,3,4,5,6,7]];
plt.plot(x,y,'o')
Out[216]: [<matplotlib.lines.Line2D at 0x5c63959eb8>]
11
In [217]: g8=g[:,[0,1,2,3,4,5,6,7,8]];
plt.plot(x,y,'o')
Out[217]: [<matplotlib.lines.Line2D at 0x5c639c17b8>]
12
In [218]: g9=g[:,[0,1,2,3,4,5,6,7,8,9]];
plt.plot(x,y,'o')
Out[218]: [<matplotlib.lines.Line2D at 0x5c63a06cc0>]
1.6 b)
In [219]: trainerror=[train_error1,train_error2,train_error3,train_error4,train_error5,train_er
testerror=[test_error1,test_error2,test_error3,test_error4,test_error5,test_error6,te
In [220]: n=list(range(1,10))
In [221]: plt.plot(n,trainerror,marker='o')
plt.plot(n,testerror,color='r',marker='o')
plt.xlabel('n'); plt.ylabel('error')
plt.legend(['Training Error','Test Error'])
Out[221]: <matplotlib.legend.Legend at 0x5c63aaac88>
13
In [225]: test_error4
Out[225]: 0.017688444243537934
In [226]: test_error5
Out[226]: 0.016944689569137104
In [227]: abs(train_error4-test_error4)
Out[227]: 0.007501399085289059
In [228]: abs(train_error5-test_error5)
Out[228]: 0.003355065146649662
1.7 So, from the above plot we can see that at n=5 test error is minimum and the
difference between training and test error is also minimum so n=5 is suitable here.
14

1 Question No. 1 Synthetic Data Generation and Simple Curve Fitting

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

1 Question No. 1 Synthetic Data Generation and Simple Curve Fitting

Uploaded by

Copyright:

Available Formats

ML1

February 13, 2019

1 Question no. 1 Synthetic data generation and simple curve fitting

Stored 'x' (ndarray)

Out[168]: array([4.56572618e-05, 3.20138948e-02, 7.33332470e-02, 8.22840269e-02,

In [169]: y=np.sin(2*np.pi*x)+ np.random.normal(0,0.3,10)

Stored 'y' (ndarray)

Out[169]: array([ 0.03583561, 0.08863262, 0.26436301, 0.73229327, 0.6498295 ,

Out[171]: (array([4.56572618e-05, 3.20138948e-02, 7.33332470e-02, 8.22840269e-02,

Stored 'a' (list)

In [174]: index=[i for i in list(range(0,10)) if i not in list(a)]

In [175]: X_test=x[index]; y_test=y[index];

Out[176]: array([0.03201389, 0.5092086 , 0.3022059 , 0.54848742, 0.07333325,

Out[177]: array([[ 0.08863262],

In [179]: X_train_1=m[:,[0,1]]; X_test_1=M[:,[0,1]]; theta1=np.zeros((2,1));

In [180]: def model(X,theta):

Number of iterations: 4028

Out[181]: array([[ 0.61901605],

In [182]: test_error1=cost(X_test_1,theta_opt1,y_test); train_error1=cost(X_train_1,theta_opt1,

In [183]: X_train_2=m[:,[0,1,2]]; X_test_2=M[:,[0,1,2]]; theta2=np.zeros((3,1))

Number of iterations: 77664

Out[184]: array([[ 0.45100356],

In [185]: test_error2=cost(X_test_2,theta_opt2,y_test); train_error2=cost(X_train_2,theta_opt2,

In [186]: X_train_3=m[:,[0,1,2,3]]; X_test_3=M[:,[0,1,2,3]]; theta3=np.zeros((4,1))

Out[187]: array([[ 0.30054854],

In [188]: test_error3=cost(X_test_3,theta_opt3,y_test); train_error3=cost(X_train_3,theta_opt3,

In [189]: X_train_4=m[:,[0,1,2,3,4]]; X_test_4=M[:,[0,1,2,3,4]]; theta4=np.zeros((5,1))

Number of iterations: 99999

Out[190]: array([[ 0.13043307],

In [191]: test_error4=cost(X_test_4,theta_opt4,y_test); train_error4=cost(X_train_4,theta_opt4,

In [192]: X_train_5=m[:,[0,1,2,3,4,5]]; X_test_5=M[:,[0,1,2,3,4,5]]; theta5=np.zeros((6,1))

Number of iterations: 99999

Out[193]: array([[ 0.02671602],

In [194]: test_error5=cost(X_test_5,theta_opt5,y_test); train_error5=cost(X_train_5,theta_opt5,

In [195]: X_train_6=m[:,[0,1,2,3,4,5,6]]; X_test_6=M[:,[0,1,2,3,4,5,6]]; theta6=np.zeros((7,1)

Number of iterations: 99999

Out[196]: array([[ -0.02774013],

In [197]: test_error6=cost(X_test_6,theta_opt6,y_test); train_error6=cost(X_train_6,theta_opt6,

In [198]: X_train_7=m[:,[0,1,2,3,4,5,6,7]]; X_test_7=M[:,[0,1,2,3,4,5,6,7]]; theta7=np.zeros((

Number of iterations: 99999

Out[199]: array([[ -0.05590717],

In [200]: test_error7=cost(X_test_7,theta_opt7,y_test); train_error7=cost(X_train_7,theta_opt7,

In [201]: X_train_8=m[:,[0,1,2,3,4,5,6,7,8]]; X_test_8=M[:,[0,1,2,3,4,5,6,7,8]]; theta8=np.zer

Out[202]: array([[ -0.07098847],

In [203]: test_error8=cost(X_test_8,theta_opt8,y_test); train_error8=cost(X_train_8,theta_opt8,

In [204]: X_train_9=m[:,[0,1,2,3,4,5,6,7,8,9]]; X_test_9=M[:,[0,1,2,3,4,5,6,7,8,9]]; theta9=np

Number of iterations: 99999

In [206]: test_error9=cost(X_test_9,theta_opt9,y_test); train_error9=cost(X_train_9,theta_opt9,

Out[208]: [<matplotlib.lines.Line2D at 0x5c6272d978>]

Out[210]: [<matplotlib.lines.Line2D at 0x5c626ddb00>]

Out[211]: [<matplotlib.lines.Line2D at 0x5c627a9ac8>]

Out[212]: [<matplotlib.lines.Line2D at 0x5c637df6d8>]

Out[213]: [<matplotlib.lines.Line2D at 0x5c627af9b0>]

Out[214]: [<matplotlib.lines.Line2D at 0x5c63804b00>]

Out[215]: [<matplotlib.lines.Line2D at 0x5c638ee550>]

Out[216]: [<matplotlib.lines.Line2D at 0x5c63959eb8>]

Out[217]: [<matplotlib.lines.Line2D at 0x5c639c17b8>]

Out[218]: [<matplotlib.lines.Line2D at 0x5c63a06cc0>]

Out[221]: <matplotlib.legend.Legend at 0x5c63aaac88>

You might also like

In [169]: y=np.sin(2np.pix)+ np.random.normal(0,0.3,10)