You are on page 1of 47

Banasthali Vidyapith

Tonk Rajasthan
Faculty of Mathematics and Computing
Information Technology

“AI ML LAB RECORD”

Submitted by:
Name: SHELLY SHARMA
Smart Card ID: BTBTI20249
Roll no. : 2016820
LAB-1

AIM: To learn about data preprocessing.

LOGIC:

Steps to import a dataset that has been downloaded


from kaggle.com :

import pandas as pd

df = pd.read_csv(r'diabetes.csv')

print(df)

Steps to find the null value:

import pandas as pd

df=pd.read_csv('diabetes.csv')

df.isnull()
Steps to learn if there is any duplicate row:

bool_series=pd.isnull(df["Insulin"])

print(bool_series)

Steps to extract dependent and independent variables:

bool_series=pd.isnull(df.iloc[1])

print(bool_series)
LAB-2

AIM: TO LEARN HOW TO NORMALIZE.

LOGIC:

Steps to create a data frame:

df = pd.DataFrame({'date':['2021-01-01', '2021-
01-02', '2021-01-03', '2021-01-04','2021-01-01',
'2021-01-02', '2021-01-03', '2021-01-04'], 'fruit':
['apple', 'apple', 'apple', 'apple', 'mango', 'mango',
'mango', 'mango'], 'price': [0.80, None, None, 1.20,
None, 2.10, 2.00, 1.80]})

df['date'] = pd.to_datetime(df['date'])
Steps to find null values:

df['price'].fillna(value = 0.85, inplace = True)

Steps to replace a null value to a specified values:

df['price'].fillna(value = df.price.mean(), inplace =


True)

df.price.mean()
Steps to calculate median of the specified value:

df['price'].fillna(value = df.price.median(),
inplace = True)

df.price.median()

Steps to calculate mean:

df['price'].fillna(df.groupby('fruit')
['price'].transform('mean'), inplace = True)
Steps to group data in the similar categories:

df['price'].fillna(df.groupby('fruit')
['price'].transform('median'), inplace = True)

Steps to replace the null value with the previous row


specified value

df['price'].fillna(method='ffill', inplace=True)

df

Steps to replace the null value with specified but with


a limit
df['price'].fillna(method='ffill', limit=1,
inplace=True)

df

Steps to group the data

df['price']=df.groupby('fruit')['price'].ffill()

df

Steps to group the data with the fill in missing values


df['price']=df.groupby('fruit')
['price'].ffill(limit=1)

df

Steps to fill the missing values backward

df['price'].fillna(method = 'bfill', inplace = True)

df
Steps to backward fill the values but with a limit

df['price'].fillna(method = 'bfill',limit=1, inplace =


True)

df

Steps to group the data with backward as well as


forward

filldf['price'] = df.groupby('fruit')
['price'].bfill().ffill()

df
Steps to Interpolation

df['price'].interpolate(method='linear',
inplace=True)

df

df['price']=df.groupby('fruit')['price'].apply(lambda
x:x.interpolate(method='linear'))

df
df['price']=df.groupby('fruit')['price'].apply(lambda
x:x.interpolate(method='linear')).bfill()

df

df['weekday']=df['date'].apply(lambda x: False if
x.day_name() in ['Saturday', 'Sunday'] else True)

df
mean_price=df.groupby('fruit')
['price'].transform('mean')

df

df['price'].fillna((mean_price).where(cond =
df.weekday, other = mean_price*1.25), inplace = True)

df

LAB-3
AIM: To learn mapping

df=pd.DataFrame([['green','M',10.1,'class1'],
['red','L',13.5,'class2'],['blue','XL',15.3,'class1']])

df

df.columns=['color','size','price','classlabel']

df

import numpy as np

size_mapping={'XL':3,'L':2,'M':1}

df['size']=df['size'].map(size_mapping)

df
np.unique(df['classlabel'])

class_mapping={label:idx for idx,label in


enumerate(np.unique(df['classlabel']))}

class_mapping

df['classlabel']=df['classlabel'].map(class_mapping)

class_mapping

print (df)
LAB-4

AIM: To drop rows

import sys

if sys.version_info[0]>=3:

nicode=str

import pandas as pd

from io import StringIO

csv_data=’’’ A,B,C,D

1.0,2.0,3.0,4.0

5.0,6.0,,8.0

0.0,11.0,12.0,’’’

csv_data= str (csv_data)

df=pd.read_csv(StringIO(csv_data))
df

df.isnull()

df.isnull().sum()

df.values
df.dropna()

df.dropna(axis=1)

df.dropna(how=’all’)

df.dropna(thresh=4)
df.dropna(subset=[‘C’])

import numpy as np

from sklearn.impute import SimpleImputer

imputer= SimpleImputer(missing_values=
np.nan,strategy=’mean’)

imputer.fit([[1,2],[np.nan,3],[7,6]])

SimpleImputer()

X=[[np.nan,2],[6,np.nan],[7,6]]

print(imputer.transform(X))
LAB-5

AIM: To learn pre-processing

LOGIC:

from sklearn import preprocessing

import numpy as np

X_train = np.array([[ 1., -1., 2.],

[ 2., 0., 0.],

[ 0., 1., -1.]])

scaler = preprocessing.StandardScaler().fit(X_train)

scaler

scaler.mean_
scaler.scale_

X_scaled = scaler.transform(X_train)

X_scaled

X_scaled.mean(axis=0)

X_scaled.std(axis=0)
LAB-6 & LAB-7

AIM: Learn to plot Linear Regression Model .

LOGIC:

import pandas as pd

from sklearn.linear_model import LinearRegression

from sklearn.metrics import


mean_squared_error,r2_score

import matplotlib.pyplot as plt

import numpy as np

from sklearn import preprocessing

df=pd.read_csv(‘diabetes.csv’)

print(df)
x=df.DiabetesPedigreeFunction.max()

y=df.Glucose.max()

x=df.DiabetesPedigreeFunction.min()
x

df.to_numpy()

x=df.DiabetesPedigreeFunction.to_numpy()

y=df.Glucose.to_numpy()

x=x.reshape(-1,1)

regression_model=LinearRegression()

regression_model.fit(x,y)

y_predicted=regression_model.predict(x)

y_predicted
plt.scatter(x,y,color=’red’)

plt.plot(x,y_predicted,color=’green’)

plt.xlabel(‘x’)

plt.ylabel(‘Y’)
from sklearn.preprocessing import MinMaxScaler

scaler=MinMaxScaler()

print(scaler.fit(x))

print(scaler.data_max_)

print(scaler.transform(x))

plt.scatter(x,y,color=’red’)

plt.plot(x,y_predicted,color=’green’)

plt.xlabel(‘x’)
plt.ylabel(‘Y’)

y=df.Glucose.to_numpy

y=y.reshape(-1,1)

y=df.Glucose.to_numpy()

y=y.reshape(-1,1)

regression_model=LinearRegression()

regression_model.fit(x,y)

plt.scatter(x,y,color=’red’)

plt.plot(x,y_predicted,color=’green’)
plt.xlabel(‘x’)

plt.ylabel(‘Y’)

from sklearn.preprocessing import MinMaxScaler

scaler=MinMaxScaler()

print(scaler.fit(y))

print(scaler.data_max_)

y=scaler.transform(y)
y

error=y-y_predicted

error

error=y-y_predicted

se=np.sum(error**2)

print(‘Squared error is ‘,se)


n=np.size(x)

print(n)

x_mean=np.mean(x)

y_mean=np.mean(y)

x_mean,y_mean

Sxy=np.sum(x*y)-n*x_mean*y_mean

Sxx=np.sum(x*x)-n*x_mean*x_mean

Sxx,Sxy

b1=Sxy/Sxx

b0=y_mean-b1*x_mean

print(“Slope :”,b1)

print(“Intercept is”,b0)
plt.scatter(x,y)

plt.xlabel(“Independent variable x”)

plt.ylabel(“Dependent variable y”)

y_pred=b0+b1*x

y_pred
plt.scatter(x,y,color=’red’)

plt.plot(x,y_pred,color=’green’)

plt.xlabel(‘x’)

plt.ylabel(‘Y’)

error=y-y_pred
se=np.sum(error**2)

print(‘Squared error is ‘,se)

mse=se/n

mse

rmse=np.sqrt(mse)

print(‘Root mean squared error ‘,rmse)

SSt=np.sum((y-y_mean)**2)

R2=1-(se/SSt)

print(‘R square is ‘,R2)

regression_model=LinearRegression()

regression_model.fit(x,y)
y_predicted=regression_model.predict(x)

y_predicted

plt.scatter(x,y,color=’red’)

plt.plot(x,y_pred,color=’green’)

plt.xlabel(‘x’)

plt.ylabel(‘Y’)
x=df[[‘DiabetesPedigreeFunction’,’Age’]]

x.to_numpy()

regression_model=LinearRegression()

regression_model.fit(x,y)

y_predicted=regression_model.predict(x)

y_predicted
LAB-8 & LAB-9 & LAB-10

AIM : To plot various situations using matplotlib

LOGIC:

To import various libraries

import numpy as np

import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error,


r2_score

import statsmodels.api as sm
x= np.array([1,2,3,4,5])

y= np.array([7,14,15,18,19])

n= np.size(x)

x_mean= np.mean(x)

y_mean= np.mean(y)

x_mean, y_mean

Sxy= np.sum(x*y)-n*x_mean*y_mean

Sxx= np.sum(x*x)-n*x_mean*x_mean

b1= Sxy/Sxx

b0= y_mean-b1*x_mean
print('slope b1 is', b1)

print('intercept b0 is',b0)

plt.scatter(x,y)

plt.xlabel('Independent variable x')

plt.ylabel('Dependent variable y')


y_pred= b1*x+b0

plt.scatter(x,y, color='red')

plt.plot(x,y_pred,color='green')

plt.xlabel('X')

plt.ylabel('Y')

error= y- y_pred

se = np.sum (error**2)

print('squared error is', se)


mse= se/n

print('mean squared error is',mse)

rmse = np.sqrt(mse)

print('root mean square error is',rmse)

SSt=np.sum((y-y_mean)**2)

R2=1- (se/SSt)

print('R square is',R2)

x= x.reshape(-1,1)

regression_model= LinearRegression()
import pandas as pd

df= pd.read_csv(r'diabetes.csv')

print (df)
Sxy= np.sum(x*y)-n*x_mean*y_mean

Sxx= np.sum(x*x)-n*x_mean*x_mean

b1= Sxy/Sxx

b0= y_mean-b1*x_mean

print('slope b1 is', b1)

print('intercept b0 is',b0)
plt.scatter(x,y)

plt.xlabel('Independent variable x')

plt.ylabel('Dependent variable y')

y_pred= b1*x+b0

plt.scatter(x,y, color='red')

plt.plot(x,y_pred,color='green')

plt.xlabel('X')

plt.ylabel('Y')
Steps to CALCULATE MEAN:

error= y- y_pred

se = np.sum (error**2)

print('squared error is', se)

mse= se/n

print('mean squared error is',mse)


rmse = np.sqrt(mse)

print('root mean square error is',rmse)

SSt=np.sum((y-y_mean)**2)

R2=1- (se/SSt)

print('R square is',R2)

mse= mean_squared_error(y,y_predicted)

rmse= np.sqrt(mean_squared_error(y,y_predicted))
r2= r2_score(y,y_predicted)

print('Slope:', regression_model.coef_)

print('Intercept:', regression_model.intercept_)

print ('MSE:', mse)

print('Root mean squared error: ', rmse)

print('R2 score:', r2)

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

print(scaler.fit(x))
print(scaler.transform(x))

You might also like