You are on page 1of 3

In 

[1]: import pandas as pd

In [2]: import numpy as np

In [7]: mba = pd.read_csv("D:\\Course\\Python\\Datasets\\mba.csv")


mba

...

In [8]: del mba['Datasrno']


In [9]: mba
...

In [10]: names = mba.columns


names

Out[10]: Index(['workex', 'gmat'], dtype='object')

Standardization
In [11]: from sklearn import preprocessing

# scaler is function using to create the standard function

scaler = preprocessing.StandardScaler()

In [12]: ​
mba1 = scaler.fit(mba) # applied on the data set

mba1 = scaler.transform(mba) # it wil conver the values into scaled data

In [13]: mba1

Out[13]: array([[-1.3336917 , 0.30134669],

[ 1.80853813, -2.42709832],

[-0.01833968, 0.98345794],

...,

[-1.07792881, -3.4502652 ],

[-1.73560482, -3.4502652 ],

[-0.20102746, -3.10920957]])
In [14]: mba1 = pd.DataFrame(mba1, columns=names)

In [15]: mba1
...

Method 2 for Standardization


we can use scale function to standardize the data

Scale function is available under sklearn Module

In [16]: from sklearn.preprocessing import scale


mba2 = scale(mba)

In [17]: mba2
...

In [21]: mba2 = pd.DataFrame(mba2, columns=names)


mba2

...

Normalization - ReScale the Data


In [22]: from sklearn.preprocessing import MinMaxScaler

# scaler is function using to create the standard function

#scaler = preprocessing.StandardScaler()

In [23]: scale = MinMaxScaler()

In [24]: mba3 = scale.fit(mba)



mba3 = scale.transform(mba)

In [26]: mba3

Out[26]: array([[0.04444444, 0.66666667],

[0.36296296, 0.22222222],

[0.17777778, 0.77777778],

...,

[0.07037037, 0.05555556],

[0.0037037 , 0.05555556],

[0.15925926, 0.11111111]])
In [27]: mba3 = pd.DataFrame(mba3, columns=names)

In [28]: mba3

...

Method 2 for Normaliztion


In [29]: mba4 = (mba - mba.min())/ (mba.max() - mba.min())

In [30]: mba4

...

In [ ]: ​

You might also like