You are on page 1of 4

Experiment No.

:1
Write python programs to demonstrate Preprocessing.
Program:
import numpy as np
import pandas as pd

#Creating Dataframe with numpy array


X=pd.DataFrame(np.array([5,7,8,np.NAN,np.NAN,np.NAN,-1,0,25,999,1,-
1,np.NAN,0,np.NAN]).reshape((5,3)))
#Adding Columns to DataFrame
X.columns=['f1','f2','f3']

#Dropping Rows With all NAN values in the row and reflect ont the original matrix
X.dropna(axis=0,how='all',inplace=True)

#Reseting Index after droping Rows


X.reset_index(inplace=True)

#Dropping 1 index column


X.drop(['index'],axis=1,inplace=True)

#Fill NAN values with mean


X.fillna(X.mean(),inplace=True)

#Printing Random Samples


X.sample(2)

X.describe()

X[['f1','f2','f3']].corr()

X1=X[['f1','f2']]
X1.plot.scatter(x='f1',y='f2')

from pandas.plotting import scatter_matrix


X2=X[['f1','f2','f3']]
scatter_matrix(X2,alpha=0.2)

#Renaming Columns
X.rename(columns={"f1":"F1","f2":"F2","f3":"F3"},inplace=True)
#Creating new column sum
X["sum"]=X["F1"]+X["F2"]+X["F3"]

#Dropping column sum


X=X.drop(["sum"],axis=1)

Output:
Experiment No.:2
Write a python program to Preprocess WeatherAus Dataset..
Program:
import numpy as np
import pandas as pd

data=pd.read_csv("weatherAUS.csv")
data.head()

data.dropna(axis=0,how="all",inplace=True)
data.reset_index(inplace=True)
data.drop(["index"],axis=1,inplace=True)

data.fillna(data.mean(),inplace=True)

data.describe()
data[['MinTemp','MaxTemp']].corr()

x=data[['MinTemp','MaxTemp']]
x.plot.scatter(x='MinTemp',y='MaxTemp')

from pandas.plotting import scatter_matrix


scatter_matrix(x,alpha=0.2)

data["AveragePressure"]=(data["Pressure9am"]+data["Pressure3pm"])/2
data.head()

data.drop(["AveragePressure"],axis=1,inplace=True)
data.head()

Output:

You might also like