2/2/24, 1:30 PM Dimentionality Reduction Using PCA - Jupyter Notebook
In [5]: # import the liabaries
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
In [7]: # creating the dataset
x= np.array ([[4,11],[8,4],[13,5],[7,14]])
x
Out[7]: array([[ 4, 11],
[ 8, 4],
[13, 5],
[ 7, 14]])
In [8]: x. shape
Out[8]: (4, 2)
In [10]: # scatter plot
plt.scatter(x[:,0],x[:,1])
plt. xlabel("x1")
plt. ylabel ("x2")
plt. show()
step-1:-standardization of data
In [11]: # calculation of mean values
xbar= x.mean(axis=0)
xbar
Out[11]: array([8. , 8.5])
localhost:8888/notebooks/Dimentionality Reduction Using PCA.ipynb 1/5
2/2/24, 1:30 PM Dimentionality Reduction Using PCA - Jupyter Notebook
In [12]: sigma=np.std(x,axis=0,ddof=1)
sigma
Out[12]: array([3.74165739, 4.79583152])
In [13]: z= (x-xbar)/sigma
z
Out[13]: array([[-1.06904497, 0.52128604],
[ 0. , -0.93831486],
[ 1.33630621, -0.72980045],
[-0.26726124, 1.14682928]])
In [14]: plt.scatter(z[:,0],z[:,1])
plt. xlabel("x1")
plt. ylabel ("x2")
plt. show()
In [15]: np.round(z.mean(axis=0),2)
Out[15]: array([-0., 0.])
In [16]: z.std (axis=0,ddof=1)
Out[16]: array([1., 1.])
step-2: calculation of covariance matrix
In [19]: cov_matrix= np.cov(x,rowvar=False)
cov_matrix
Out[19]: array([[ 14., -11.],
[-11., 23.]])
step 3:- calculation of Eigen values
localhost:8888/notebooks/Dimentionality Reduction Using PCA.ipynb 2/5
2/2/24, 1:30 PM Dimentionality Reduction Using PCA - Jupyter Notebook
In [20]: eigen_values,eigen_vector=np.linalg.eigh(cov_matrix)
In [21]: eigen_values
Out[21]: array([ 6.61513568, 30.38486432])
step-4: calculation of Eigen Vectors
In [24]: eigen_vector
Out[24]: array([[-0.83025082, -0.55738997],
[-0.55738997, 0.83025082]])
In [25]: sorted_index=np.argsort(eigen_values)[::-1]
sorted_index
Out[25]: array([1, 0], dtype=int64)
In [26]: sorted_eigenvector=eigen_vector[:,sorted_index]
sorted_eigenvector
Out[26]: array([[-0.55738997, -0.83025082],
[ 0.83025082, -0.55738997]])
In [29]: sorted_eigenvector= -1*sorted_eigenvector
sorted_eigenvector
Out[29]: array([[ 0.55738997, 0.83025082],
[-0.83025082, 0.55738997]])
step5- Calculation of Principle Component
In [30]: from sklearn.decomposition import PCA
pca=PCA(n_components=2)
In [32]: data_pca=pca.fit_transform(x)
data_pca
Out[32]: array([[-4.30518692, 1.92752836],
[ 3.73612869, 2.50825486],
[ 5.69282771, -2.20038921],
[-5.12376947, -2.23539401]])
In [33]: pca.explained_variance_
Out[33]: array([30.38486432, 6.61513568])
In [35]: explained_variance=(pca.explained_variance_ratio_)*100
explained_variance
Out[35]: array([82.12125493, 17.87874507])
localhost:8888/notebooks/Dimentionality Reduction Using PCA.ipynb 3/5
2/2/24, 1:30 PM Dimentionality Reduction Using PCA - Jupyter Notebook
In [36]: plt.bar(x=range(1,3),height=explained_variance)
plt.xlabel("Number of component")
plt.ylabel("variance percentage")
plt.title('variance of pc')
plt.show()
In [37]: plt.figure(figsize=(8,6))
plt.scatter(data_pca[:,0],data_pca[:,1])
plt.xlabel("pc1")
plt.ylabel("pc2")
plt.show()
In [ ]:
In [ ]:
localhost:8888/notebooks/Dimentionality Reduction Using PCA.ipynb 4/5
2/2/24, 1:30 PM Dimentionality Reduction Using PCA - Jupyter Notebook
In [ ]:
In [ ]:
localhost:8888/notebooks/Dimentionality Reduction Using PCA.ipynb 5/5