LAB FILE-Shelly Sharma

Banasthali Vidyapith
Tonk Rajasthan
Faculty of Mathematics and Computing
Information Technology
“AI ML LAB RECORD”
Submitted by:
Name: SHELLY SHARMA
Smart Card ID: BTBTI20249
Roll no. : 2016820
LAB-1
AIM: To learn about data preprocessing.
LOGIC:
Steps to import a dataset that has been downloaded

from kaggle.com :
import pandas as pd
df = pd.read_csv(r'diabetes.csv')
print(df)
Steps to find the null value:
import pandas as pd
df=pd.read_csv('diabetes.csv')
df.isnull()
Steps to learn if there is any duplicate row:
bool_series=pd.isnull(df["Insulin"])
print(bool_series)
Steps to extract dependent and independent variables:
bool_series=pd.isnull(df.iloc[1])
print(bool_series)
LAB-2
AIM: TO LEARN HOW TO NORMALIZE.
LOGIC:
Steps to create a data frame:
df = pd.DataFrame({'date':['2021-01-01', '2021-
01-02', '2021-01-03', '2021-01-04','2021-01-01',
'2021-01-02', '2021-01-03', '2021-01-04'], 'fruit':
['apple', 'apple', 'apple', 'apple', 'mango', 'mango',
'mango', 'mango'], 'price': [0.80, None, None, 1.20,
None, 2.10, 2.00, 1.80]})
df['date'] = pd.to_datetime(df['date'])
Steps to find null values:
df['price'].fillna(value = 0.85, inplace = True)
Steps to replace a null value to a specified values:
df['price'].fillna(value = df.price.mean(), inplace =

True)
df.price.mean()
Steps to calculate median of the specified value:
df['price'].fillna(value = df.price.median(),
inplace = True)
df.price.median()
Steps to calculate mean:
df['price'].fillna(df.groupby('fruit')
['price'].transform('mean'), inplace = True)
Steps to group data in the similar categories:
df['price'].fillna(df.groupby('fruit')
['price'].transform('median'), inplace = True)
Steps to replace the null value with the previous row

specified value
df['price'].fillna(method='ffill', inplace=True)
df
Steps to replace the null value with specified but with

a limit
df['price'].fillna(method='ffill', limit=1,
inplace=True)
df
Steps to group the data
df['price']=df.groupby('fruit')['price'].ffill()
df
Steps to group the data with the fill in missing values

df['price']=df.groupby('fruit')
['price'].ffill(limit=1)
df
Steps to fill the missing values backward
df['price'].fillna(method = 'bfill', inplace = True)
df
Steps to backward fill the values but with a limit
df['price'].fillna(method = 'bfill',limit=1, inplace =

True)
df
Steps to group the data with backward as well as

forward
filldf['price'] = df.groupby('fruit')
['price'].bfill().ffill()
df
Steps to Interpolation
df['price'].interpolate(method='linear',
inplace=True)
df
df['price']=df.groupby('fruit')['price'].apply(lambda
x:x.interpolate(method='linear'))
df
df['price']=df.groupby('fruit')['price'].apply(lambda
x:x.interpolate(method='linear')).bfill()
df
df['weekday']=df['date'].apply(lambda x: False if
x.day_name() in ['Saturday', 'Sunday'] else True)
df
mean_price=df.groupby('fruit')
['price'].transform('mean')
df
df['price'].fillna((mean_price).where(cond =
df.weekday, other = mean_price*1.25), inplace = True)
df
LAB-3
AIM: To learn mapping
df=pd.DataFrame([['green','M',10.1,'class1'],
['red','L',13.5,'class2'],['blue','XL',15.3,'class1']])
df
df.columns=['color','size','price','classlabel']
df
import numpy as np
size_mapping={'XL':3,'L':2,'M':1}
df['size']=df['size'].map(size_mapping)
df
np.unique(df['classlabel'])
class_mapping={label:idx for idx,label in

enumerate(np.unique(df['classlabel']))}
class_mapping
df['classlabel']=df['classlabel'].map(class_mapping)
class_mapping
print (df)
LAB-4
AIM: To drop rows
import sys
if sys.version_info[0]>=3:
nicode=str
import pandas as pd
from io import StringIO
csv_data=’’’ A,B,C,D
1.0,2.0,3.0,4.0
5.0,6.0,,8.0
0.0,11.0,12.0,’’’
csv_data= str (csv_data)
df=pd.read_csv(StringIO(csv_data))
df
df.isnull()
df.isnull().sum()
df.values
df.dropna()
df.dropna(axis=1)
df.dropna(how=’all’)
df.dropna(thresh=4)
df.dropna(subset=[‘C’])
import numpy as np
from sklearn.impute import SimpleImputer
imputer= SimpleImputer(missing_values=
np.nan,strategy=’mean’)
imputer.fit([[1,2],[np.nan,3],[7,6]])
SimpleImputer()
X=[[np.nan,2],[6,np.nan],[7,6]]
print(imputer.transform(X))
LAB-5
AIM: To learn pre-processing
LOGIC:
from sklearn import preprocessing
import numpy as np
X_train = np.array([[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]])
scaler = preprocessing.StandardScaler().fit(X_train)
scaler
scaler.mean_
scaler.scale_
X_scaled = scaler.transform(X_train)
X_scaled
X_scaled.mean(axis=0)
X_scaled.std(axis=0)
LAB-6 & LAB-7
AIM: Learn to plot Linear Regression Model .
LOGIC:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import

mean_squared_error,r2_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
df=pd.read_csv(‘diabetes.csv’)
print(df)
x=df.DiabetesPedigreeFunction.max()
y=df.Glucose.max()
x=df.DiabetesPedigreeFunction.min()
x
df.to_numpy()
x=df.DiabetesPedigreeFunction.to_numpy()
y=df.Glucose.to_numpy()
x=x.reshape(-1,1)
regression_model=LinearRegression()
regression_model.fit(x,y)
y_predicted=regression_model.predict(x)
y_predicted
plt.scatter(x,y,color=’red’)
plt.plot(x,y_predicted,color=’green’)
plt.xlabel(‘x’)
plt.ylabel(‘Y’)
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
print(scaler.fit(x))
print(scaler.data_max_)
print(scaler.transform(x))
plt.xlabel(‘x’)
plt.ylabel(‘Y’)
y=df.Glucose.to_numpy
y=y.reshape(-1,1)
y=df.Glucose.to_numpy()
y=y.reshape(-1,1)
plt.xlabel(‘x’)
plt.ylabel(‘Y’)
scaler=MinMaxScaler()
print(scaler.fit(y))
print(scaler.data_max_)
y=scaler.transform(y)
y
error=y-y_predicted
error
error=y-y_predicted
se=np.sum(error**2)
print(‘Squared error is ‘,se)

n=np.size(x)
print(n)
x_mean=np.mean(x)
y_mean=np.mean(y)
x_mean,y_mean
Sxy=np.sum(x*y)-n*x_mean*y_mean
Sxx=np.sum(x*x)-n*x_mean*x_mean
Sxx,Sxy
b1=Sxy/Sxx
b0=y_mean-b1*x_mean
print(“Slope :”,b1)
print(“Intercept is”,b0)
plt.scatter(x,y)
plt.xlabel(“Independent variable x”)
plt.ylabel(“Dependent variable y”)
y_pred=b0+b1*x
y_pred
plt.plot(x,y_pred,color=’green’)
plt.xlabel(‘x’)
plt.ylabel(‘Y’)
error=y-y_pred
se=np.sum(error**2)
print(‘Squared error is ‘,se)
mse=se/n
mse
rmse=np.sqrt(mse)
print(‘Root mean squared error ‘,rmse)
SSt=np.sum((y-y_mean)**2)
R2=1-(se/SSt)
print(‘R square is ‘,R2)
y_predicted
plt.plot(x,y_pred,color=’green’)
plt.xlabel(‘x’)
plt.ylabel(‘Y’)
x=df[[‘DiabetesPedigreeFunction’,’Age’]]
x.to_numpy()
y_predicted
LAB-8 & LAB-9 & LAB-10
AIM : To plot various situations using matplotlib
LOGIC:
To import various libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,

r2_score
import statsmodels.api as sm
x= np.array([1,2,3,4,5])
y= np.array([7,14,15,18,19])
n= np.size(x)
x_mean= np.mean(x)
y_mean= np.mean(y)
x_mean, y_mean
Sxy= np.sum(x*y)-n*x_mean*y_mean
Sxx= np.sum(x*x)-n*x_mean*x_mean
b1= Sxy/Sxx
b0= y_mean-b1*x_mean
print('slope b1 is', b1)
print('intercept b0 is',b0)
plt.scatter(x,y)
plt.xlabel('Independent variable x')
plt.ylabel('Dependent variable y')

y_pred= b1*x+b0
plt.scatter(x,y, color='red')
plt.plot(x,y_pred,color='green')
plt.xlabel('X')
plt.ylabel('Y')
error= y- y_pred
se = np.sum (error**2)
print('squared error is', se)

mse= se/n
print('mean squared error is',mse)
rmse = np.sqrt(mse)
print('root mean square error is',rmse)
R2=1- (se/SSt)
print('R square is',R2)
x= x.reshape(-1,1)
regression_model= LinearRegression()
import pandas as pd
df= pd.read_csv(r'diabetes.csv')
print (df)
Sxy= np.sum(x*y)-n*x_mean*y_mean
Sxx= np.sum(x*x)-n*x_mean*x_mean
b1= Sxy/Sxx
b0= y_mean-b1*x_mean
print('slope b1 is', b1)
print('intercept b0 is',b0)
plt.scatter(x,y)
plt.xlabel('Independent variable x')
plt.ylabel('Dependent variable y')
y_pred= b1*x+b0
plt.scatter(x,y, color='red')
plt.plot(x,y_pred,color='green')
plt.xlabel('X')
plt.ylabel('Y')
Steps to CALCULATE MEAN:
error= y- y_pred
se = np.sum (error**2)
print('squared error is', se)
mse= se/n
print('mean squared error is',mse)

rmse = np.sqrt(mse)
print('root mean square error is',rmse)
R2=1- (se/SSt)
print('R square is',R2)
mse= mean_squared_error(y,y_predicted)
rmse= np.sqrt(mean_squared_error(y,y_predicted))
r2= r2_score(y,y_predicted)
print('Slope:', regression_model.coef_)
print('Intercept:', regression_model.intercept_)
print ('MSE:', mse)
print('Root mean squared error: ', rmse)
print('R2 score:', r2)
scaler = MinMaxScaler()
print(scaler.fit(x))
print(scaler.transform(x))

LAB FILE-Shelly Sharma

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

LAB FILE-Shelly Sharma

Uploaded by

Copyright:

Available Formats

Banasthali Vidyapith

“AI ML LAB RECORD”

AIM: To learn about data preprocessing.

Steps to import a dataset that has been downloaded

Steps to find the null value:

Steps to extract dependent and independent variables:

AIM: TO LEARN HOW TO NORMALIZE.

Steps to create a data frame:

df['price'].fillna(value = 0.85, inplace = True)

Steps to replace a null value to a specified values:

df['price'].fillna(value = df.price.mean(), inplace =

Steps to calculate mean:

Steps to replace the null value with the previous row

Steps to replace the null value with specified but with

Steps to group the data

Steps to group the data with the fill in missing values

Steps to fill the missing values backward

df['price'].fillna(method = 'bfill', inplace = True)

df['price'].fillna(method = 'bfill',limit=1, inplace =

Steps to group the data with backward as well as

class_mapping={label:idx for idx,label in

AIM: To drop rows

from io import StringIO

csv_data= str (csv_data)

from sklearn.impute import SimpleImputer

AIM: To learn pre-processing

from sklearn import preprocessing

X_train = np.array([[ 1., -1., 2.],

[ 2., 0., 0.],

[ 0., 1., -1.]])

AIM: Learn to plot Linear Regression Model .

from sklearn.linear_model import LinearRegression

from sklearn.metrics import

import matplotlib.pyplot as plt

from sklearn import preprocessing

from sklearn.preprocessing import MinMaxScaler

print(‘Squared error is ‘,se)

plt.xlabel(“Independent variable x”)

plt.ylabel(“Dependent variable y”)

print(‘Squared error is ‘,se)

print(‘Root mean squared error ‘,rmse)

print(‘R square is ‘,R2)

AIM : To plot various situations using matplotlib

To import various libraries

import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error,

plt.xlabel('Independent variable x')

plt.ylabel('Dependent variable y')

print('squared error is', se)

print('mean squared error is',mse)

print('root mean square error is',rmse)

print('R square is',R2)

print('slope b1 is', b1)

plt.xlabel('Independent variable x')

plt.ylabel('Dependent variable y')

print('squared error is', se)

print('mean squared error is',mse)

print('root mean square error is',rmse)

print('R square is',R2)

print ('MSE:', mse)

print('Root mean squared error: ', rmse)

print('R2 score:', r2)