# Importing Necessary Libraries: Import As Import As Import As Import As

# Importing Necessary Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#Importing dataset
ipl_df = pd.read_csv('/content/ipl_data.csv')
print(f"Dataset successfully Imported of Shape : {ipl_df.shape}")
Dataset successfully Imported of Shape : (76014, 15)
# First 5 Columns Data

ipl_df.head()
{"summary":"{\n \"name\": \"ipl_df\",\n \"rows\": 76014,\n

\"fields\": [\n {\n \"column\": \"mid\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
178,\n \"min\": 1,\n \"max\": 617,\n
\"num_unique_values\": 617,\n \"samples\": [\n 50,\n
582,\n 83\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"date\",\n \"properties\": {\n \"dtype\": \"object\",\n
\"num_unique_values\": 442,\n \"samples\": [\n \"2014-
05-02\",\n \"2012-05-15\",\n \"2009-05-18\"\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"venue\",\n \"properties\":
{\n \"dtype\": \"category\",\n \"num_unique_values\":
35,\n \"samples\": [\n \"Shaheed Veer Narayan Singh
International Stadium\",\n \"Buffalo Park\",\n \"Dr.
Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium\"\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"bat_team\",\n \"properties\":
14,\n \"samples\": [\n \"Pune Warriors\",\n
\"Rising Pune Supergiants\",\n \"Kolkata Knight Riders\"\n
],\n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"bowl_team\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 14,\n \"samples\": [\n \"Kochi
Tuskers Kerala\",\n \"Rising Pune Supergiants\",\n
\"Royal Challengers Bangalore\"\n ],\n
n },\n {\n \"column\": \"batsman\",\n \"properties\":
411,\n \"samples\": [\n \"A Nehra\",\n \"A
Symonds\",\n \"DJ Bravo\"\n ],\n
n },\n {\n \"column\": \"bowler\",\n \"properties\":
329,\n \"samples\": [\n \"IK Pathan\",\n \"AB
McDonald\",\n \"JM Kemp\"\n ],\n
n },\n {\n \"column\": \"runs\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 48,\n \"min\": 0,\n
\"max\": 263,\n \"num_unique_values\": 252,\n
\"samples\": [\n 106,\n 21,\n 97\
n ],\n \"semantic_type\": \"\",\n
\"wickets\",\n \"properties\": {\n \"dtype\": \"number\",\
n \"std\": 2,\n \"min\": 0,\n \"max\": 10,\n
\"overs\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 5.772586873852113,\n \"min\": 0.0,\n \"max\":
19.6,\n \"num_unique_values\": 140,\n \"samples\": [\n
17.6,\n 11.1,\n 5.1\n ],\n
n },\n {\n \"column\": \"runs_last_5\",\n
14,\n \"min\": 0,\n \"max\": 113,\n
\"wickets_last_5\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 1,\n \"min\": 0,\n
\"max\": 7,\n \"num_unique_values\": 8,\n \"samples\":
[\n 1,\n 5,\n 0\n ],\n
n },\n {\n \"column\": \"striker\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\": 20,\n
\"min\": 0,\n \"max\": 175,\n \"num_unique_values\":
155,\n \"samples\": [\n 95,\n 160,\n
80\n ],\n \"semantic_type\": \"\",\n
\"non-striker\",\n \"properties\": {\n \"dtype\":
\"samples\": [\n 69,\n 0,\n 18\n ],\
n \"semantic_type\": \"\",\n \"description\": \"\"\n
}\n },\n {\n \"column\": \"total\",\n \"properties\":
138,\n \"samples\": [\n 132,\n 115,\n
\"description\": \"\"\n }\n }\n ]\
n}","type":"dataframe","variable_name":"ipl_df"}
# Describing the ipl_dfset

ipl_df.describe()
{"summary":"{\n \"name\": \"ipl_df\",\n \"rows\": 8,\n \"fields\":

[\n {\n \"column\": \"mid\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 6155.813307577099,\n
\"min\": 1.0,\n \"max\": 17478.0,\n
\"num_unique_values\": 8,\n \"samples\": [\n
71.40788419727657,\n 72.0,\n 17478.0\n ],\n
\"dtype\": \"number\",\n \"std\": 6150.2944315706645,\n
\"min\": 0.0,\n \"max\": 17477.0,\n
74.54814899582308,\n 70.0,\n 17477.0\n ],\n
n },\n {\n \"column\": \"wickets\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\":
6177.954883141603,\n \"min\": 0.0,\n \"max\": 17477.0,\n
2.6637866910797046,\n 2.0,\n 17477.0\n ],\n
n },\n {\n \"column\": \"overs\",\n \"properties\": {\
n \"dtype\": \"number\",\n \"std\": 6175.826215003261,\n
\"min\": 0.0,\n \"max\": 17477.0,\n
9.75828803570407,\n 9.6,\n 17477.0\n ],\n
6167.329477872403,\n \"min\": 0.0,\n \"max\": 17477.0,\n
32.71310865709218,\n 34.0,\n 17477.0\n ],\n
n },\n {\n \"column\": \"wickets_last_5\",\n
6178.484772156044,\n \"min\": 0.0,\n \"max\": 17477.0,\n
\"num_unique_values\": 7,\n \"samples\": [\n 17477.0,\
n 1.1984322252102764,\n 2.0\n ],\n
n },\n {\n \"column\": \"striker\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\":
6165.786654907889,\n \"min\": 0.0,\n \"max\": 17477.0,\n
24.57475539280197,\n 20.0,\n 17477.0\n ],\n
n },\n {\n \"column\": \"non-striker\",\n
6174.403089883773,\n \"min\": 0.0,\n \"max\": 17477.0,\n
7.979802025519254,\n 4.0,\n 17477.0\n ],\n
n },\n {\n \"column\": \"total\",\n \"properties\": {\
n \"dtype\": \"number\",\n \"std\": 6129.802688655688,\n
\"min\": 28.746713170397086,\n \"max\": 17477.0,\n
159.5532986210448,\n 161.0,\n 17477.0\n ],\n
n }\n ]\n}","type":"dataframe"}
# Information about Each Column

ipl_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17478 entries, 0 to 17477
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 mid 17478 non-null int64
1 date 17478 non-null object
2 venue 17478 non-null object
3 bat_team 17478 non-null object
4 bowl_team 17478 non-null object
5 batsman 17478 non-null object
6 bowler 17478 non-null object
7 runs 17477 non-null float64
8 wickets 17477 non-null float64
9 overs 17477 non-null float64
10 runs_last_5 17477 non-null float64
11 wickets_last_5 17477 non-null float64
12 striker 17477 non-null float64
13 non-striker 17477 non-null float64
14 total 17477 non-null float64
dtypes: float64(8), int64(1), object(6)
memory usage: 2.0+ MB
# Number of Unique Values in each column

ipl_df.nunique()
mid 142
date 102
venue 20
bat_team 8
bowl_team 8
batsman 214
bowler 155
runs 225
wickets 11
overs 139
runs_last_5 85
wickets_last_5 7
striker 129
non-striker 59
total 84
dtype: int64
# ipl_df types of all Columns

ipl_df.dtypes
mid int64
date object
venue object
bat_team object
bowl_team object
batsman object
bowler object
runs float64
wickets float64
overs float64
runs_last_5 float64
wickets_last_5 float64
striker float64
non-striker float64
total float64
dtype: object
#Wickets Distribution
sns.displot(ipl_df['wickets'],kde=False,bins=10)
plt.title("Wickets Distribution")
plt.show()
#Runs Distribution
sns.displot(ipl_df['total'],kde=False,bins=10)
plt.title("Runs Distribution")
plt.show()
# Names of all columns
ipl_df.columns
Index(['mid', 'date', 'venue', 'bat_team', 'bowl_team', 'batsman',

'bowler',
'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5',
'striker',
'non-striker', 'total'],
dtype='object')
irrelevant = ['mid', 'date', 'venue','batsman', 'bowler', 'striker', 'non-striker'] print(f'Before

Removing Irrelevant Columns : {ipl_df.shape}') ipl_df = ipl_df.drop(irrelevant, axis=1) # Drop
Irrelevant Columns print(f'After Removing Irrelevant Columns : {ipl_df.shape}') ipl_df.head()
# Define Consistent Teams

const_teams = ['Kolkata Knight Riders', 'Chennai Super Kings',
'Rajasthan Royals',
'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers
Bangalore',
'Delhi Daredevils', 'Sunrisers Hyderabad']
print(f'Before Removing Inconsistent Teams : {ipl_df.shape}')

ipl_df = ipl_df[(ipl_df['bat_team'].isin(const_teams)) &
(ipl_df['bowl_team'].isin(const_teams))]
print(f'After Removing Irrelevant Columns : {ipl_df.shape}')
print(f"Consistent Teams : \n{ipl_df['bat_team'].unique()}")
ipl_df.head()
Before Removing Inconsistent Teams : (76014, 8)

After Removing Irrelevant Columns : (53811, 8)
Consistent Teams :
['Kolkata Knight Riders' 'Chennai Super Kings' 'Rajasthan Royals'
'Mumbai Indians' 'Kings XI Punjab' 'Royal Challengers Bangalore'
'Delhi Daredevils' 'Sunrisers Hyderabad']

\"fields\": [\n {\n \"column\": \"bat_team\",\n
\"num_unique_values\": 8,\n \"samples\": [\n \"Chennai
Super Kings\",\n \"Royal Challengers Bangalore\",\n
\"Kolkata Knight Riders\"\n ],\n \"semantic_type\":
\"\",\n \"description\": \"\"\n }\n },\n {\n
\"column\": \"bowl_team\",\n \"properties\": {\n
\"dtype\": \"category\",\n \"num_unique_values\": 8,\n
\"samples\": [\n \"Kings XI Punjab\",\n \"Chennai
Super Kings\",\n \"Royal Challengers Bangalore\"\n ],\
}\n },\n {\n \"column\": \"runs\",\n \"properties\":
239,\n \"samples\": [\n 66,\n 21,\n
n \"std\": 2,\n \"min\": 0,\n \"max\": 10,\n
\"std\": 5.773906711218136,\n \"min\": 0.0,\n \"max\":
17.6,\n 11.1,\n 5.1\n ],\n
15,\n \"min\": 0,\n \"max\": 94,\n
[\n 1,\n 5,\n 0\n ],\n
n \"dtype\": \"number\",\n \"std\": 30,\n
128,\n \"samples\": [\n 101,\n 149,\n
print(f'Before Removing Overs : {ipl_df.shape}')

ipl_df = ipl_df[ipl_df['overs'] >= 5.0]
print(f'After Removing Overs : {ipl_df.shape}')
ipl_df.head()
Before Removing Overs : (53811, 8)

After Removing Overs : (40108, 8)

\"num_unique_values\": 8,\n \"samples\": [\n \"Chennai
Super Kings\",\n \"Royal Challengers Bangalore\",\n
\"Kolkata Knight Riders\"\n ],\n \"semantic_type\":
\"\",\n \"description\": \"\"\n }\n },\n {\n
\"column\": \"bowl_team\",\n \"properties\": {\n
\"dtype\": \"category\",\n \"num_unique_values\": 8,\n
\"samples\": [\n \"Kings XI Punjab\",\n \"Chennai
Super Kings\",\n \"Royal Challengers Bangalore\"\n ],\
}\n },\n {\n \"column\": \"runs\",\n \"properties\":
226,\n \"samples\": [\n 71,\n 20,\n
n \"std\": 1,\n \"min\": 0,\n \"max\": 10,\n
\"std\": 4.32300139521351,\n \"min\": 5.0,\n \"max\":
10.1,\n 15.6,\n 15.5\n ],\n
11,\n \"min\": 10,\n \"max\": 94,\n
[\n 1,\n 5,\n 0\n ],\n
128,\n \"samples\": [\n 101,\n 149,\n
from seaborn import heatmap

heatmap(data=ipl_df.corr(), annot=True)
<ipython-input-28-8fde0e13fc28>:2: FutureWarning: The default value of

numeric_only in DataFrame.corr is deprecated. In a future version, it
will default to False. Select only valid columns or specify the value
of numeric_only to silence this warning.
heatmap(data=ipl_df.corr(), annot=True)
<Axes: >
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
le = LabelEncoder()
for col in ['bat_team', 'bowl_team']:
ipl_df[col] = le.fit_transform(ipl_df[col])
ipl_df.head()

2,\n \"min\": 0,\n \"max\": 7,\n
\"bowl_team\",\n \"properties\": {\n \"dtype\":
[\n 2,\n 0,\n 6\n ],\n
\"dtype\": \"number\",\n \"std\": 40,\n \"min\": 13,\n
\"samples\": [\n 71,\n 20,\n 43\
n ],\n \"semantic_type\": \"\",\n
n \"std\": 1,\n \"min\": 0,\n \"max\": 10,\n
\"std\": 4.32300139521351,\n \"min\": 5.0,\n \"max\":
10.1,\n 15.6,\n 15.5\n ],\n
11,\n \"min\": 10,\n \"max\": 94,\n
[\n 1,\n 5,\n 0\n ],\n
128,\n \"samples\": [\n 101,\n 149,\n
from sklearn.compose import ColumnTransformer

columnTransformer = ColumnTransformer([('encoder',
OneHotEncoder(),
[0, 1])],
remainder='passthrough')
ipl_df = np.array(columnTransformer.fit_transform(ipl_df))
cols = ['batting_team_Chennai Super Kings', 'batting_team_Delhi

Daredevils', 'batting_team_Kings XI Punjab',
'batting_team_Kolkata Knight Riders',
'batting_team_Mumbai Indians', 'batting_team_Rajasthan Royals',
'batting_team_Royal Challengers Bangalore',
'batting_team_Sunrisers Hyderabad',
'bowling_team_Chennai Super Kings', 'bowling_team_Delhi
Daredevils', 'bowling_team_Kings XI Punjab',
'bowling_team_Kolkata Knight Riders',
'bowling_team_Mumbai Indians', 'bowling_team_Rajasthan Royals',
'bowling_team_Royal Challengers Bangalore',
'bowling_team_Sunrisers Hyderabad', 'runs', 'wickets', 'overs',
'runs_last_5', 'wickets_last_5', 'total']
df = pd.DataFrame(ipl_df, columns=cols)
# Encoded Data
df.head()
{"type":"dataframe","variable_name":"df"}
features = df.drop(['total'], axis=1)

labels = df['total']
from sklearn.model_selection import train_test_split

train_features, test_features, train_labels, test_labels =
train_test_split(features, labels, test_size=0.20, shuffle=True)
print(f"Training Set : {train_features.shape}\nTesting Set :
{test_features.shape}")
Training Set : (32086, 21)

Testing Set : (8022, 21)
models = dict()
from sklearn.tree import DecisionTreeRegressor

tree = DecisionTreeRegressor()
# Train Model
tree.fit(train_features, train_labels)
DecisionTreeRegressor()
# Evaluate Model
train_score_tree = str(tree.score(train_features, train_labels) * 100)
test_score_tree = str(tree.score(test_features, test_labels) * 100)
print(f'Train Score : {train_score_tree[:5]}%\nTest Score :
{test_score_tree[:5]}%')
models["tree"] = test_score_tree
Train Score : 99.98%

Test Score : 87.48%
from sklearn.metrics import mean_absolute_error as mae,

mean_squared_error as mse
print("---- Decision Tree Regressor - Model Evaluation ----")
print("Mean Absolute Error (MAE): {}".format(mae(test_labels,
tree.predict(test_features))))
print("Mean Squared Error (MSE): {}".format(mse(test_labels,
tree.predict(test_features))))
print("Root Mean Squared Error (RMSE):
{}".format(np.sqrt(mse(test_labels, tree.predict(test_features)))))
---- Decision Tree Regressor - Model Evaluation ----
Mean Absolute Error (MAE): 3.855958613812017
Mean Squared Error (MSE): 113.42043754674644
Root Mean Squared Error (RMSE): 10.64990317076857
from sklearn.linear_model import LinearRegression

linreg = LinearRegression()
# Train Model
linreg.fit(train_features, train_labels)
LinearRegression()
# Evaluate Model
train_score_linreg = str(linreg.score(train_features, train_labels) *
100)
test_score_linreg = str(linreg.score(test_features, test_labels) *
100)
print(f'Train Score : {train_score_linreg[:5]}%\nTest Score :
{test_score_linreg[:5]}%')
models["linreg"] = test_score_linreg

Test Score : 66.14%
print("---- Linear Regression - Model Evaluation ----")

linreg.predict(test_features))))
linreg.predict(test_features))))
{}".format(np.sqrt(mse(test_labels, linreg.predict(test_features)))))
---- Linear Regression - Model Evaluation ----

from sklearn.ensemble import RandomForestRegressor

forest = RandomForestRegressor()
# Train Model
forest.fit(train_features, train_labels)
RandomForestRegressor()
# Evaluate Model
train_score_forest = str(forest.score(train_features,
train_labels)*100)
test_score_forest = str(forest.score(test_features, test_labels)*100)
print(f'Train Score : {train_score_forest[:5]}%\nTest Score :
{test_score_forest[:5]}%')
models["forest"] = test_score_forest
Test Score : 93.78%
print("---- Random Forest Regression - Model Evaluation ----")

forest.predict(test_features))))
forest.predict(test_features))))
{}".format(np.sqrt(mse(test_labels, forest.predict(test_features)))))
---- Random Forest Regression - Model Evaluation ----

from sklearn.svm import SVR

svm = SVR()
# Train Model
svm.fit(train_features, train_labels)
SVR()
from sklearn.svm import SVR

svm = SVR()
# Train Model
svm.fit(train_features, train_labels)
SVR()
train_score_svm = str(svm.score(train_features, train_labels)*100)

test_score_svm = str(svm.score(test_features, test_labels)*100)
print(f'Train Score : {train_score_svm[:5]}%\nTest Score :
{test_score_svm[:5]}%')
models["svm"] = test_score_svm

Test Score : 57.79%
print("---- Support Vector Regression - Model Evaluation ----")

svm.predict(test_features))))
svm.predict(test_features))))
{}".format(np.sqrt(mse(test_labels, svm.predict(test_features)))))
---- Support Vector Regression - Model Evaluation ----

from xgboost import XGBRegressor
xgb = XGBRegressor()
# Train Model
xgb.fit(train_features, train_labels)
XGBRegressor(base_score=None, booster=None, callbacks=None,

colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None,
early_stopping_rounds=None,
enable_categorical=False, eval_metric=None,
feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None,
max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan,
monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, random_state=None, ...)
train_score_xgb = str(xgb.score(train_features, train_labels)*100)

test_score_xgb = str(xgb.score(test_features, test_labels)*100)
print(f'Train Score : {train_score_xgb[:5]}%\nTest Score :
{test_score_xgb[:5]}%')
models["xgb"] = test_score_xgb

Test Score : 85.09%
print("---- XGB Regression - Model Evaluation ----")

xgb.predict(test_features))))
xgb.predict(test_features))))
{}".format(np.sqrt(mse(test_labels, xgb.predict(test_features)))))
---- XGB Regression - Model Evaluation ----

from sklearn.neighbors import KNeighborsRegressor

knr = KNeighborsRegressor()
# Train Model
knr.fit(train_features, train_labels)
KNeighborsRegressor()
train_score_knr = str(knr.score(train_features, train_labels)*100)
test_score_knr = str(knr.score(test_features, test_labels)*100)
print(f'Train Score : {train_score_knr[:5]}%\nTest Score :
{test_score_knr[:5]}%')
models["knr"] = test_score_knr

Test Score : 77.01%
print("---- KNR - Model Evaluation ----")

knr.predict(test_features))))
knr.predict(test_features))))
{}".format(np.sqrt(mse(test_labels, knr.predict(test_features)))))
---- KNR - Model Evaluation ----

import matplotlib.pyplot as plt

model_names = list(models.keys())
accuracy = list(map(float, models.values()))
# creating the bar plot
plt.bar(model_names, accuracy)
<BarContainer object of 6 artists>

def score_predict(batting_team, bowling_team, runs, wickets, overs,
runs_last_5, wickets_last_5, model=forest):
prediction_array = []
# Batting Team
if batting_team == 'Chennai Super Kings':
prediction_array = prediction_array + [1,0,0,0,0,0,0,0]
elif batting_team == 'Delhi Daredevils':
elif batting_team == 'Kings XI Punjab':
elif batting_team == 'Kolkata Knight Riders':
elif batting_team == 'Mumbai Indians':
elif batting_team == 'Rajasthan Royals':
elif batting_team == 'Royal Challengers Bangalore':
elif batting_team == 'Sunrisers Hyderabad':
# Bowling Team
if bowling_team == 'Chennai Super Kings':
elif bowling_team == 'Delhi Daredevils':
elif bowling_team == 'Kings XI Punjab':
elif bowling_team == 'Kolkata Knight Riders':
elif bowling_team == 'Mumbai Indians':
elif bowling_team == 'Rajasthan Royals':
elif bowling_team == 'Royal Challengers Bangalore':
elif bowling_team == 'Sunrisers Hyderabad':
prediction_array = prediction_array + [runs, wickets, overs,
runs_last_5, wickets_last_5]
prediction_array = np.array([prediction_array])
pred = model.predict(prediction_array)
return int(round(pred[0]))
batting_team='Delhi Daredevils'
bowling_team='Chennai Super Kings'
score = score_predict(batting_team, bowling_team, overs=10.2, runs=68,
wickets=3, runs_last_5=29, wickets_last_5=1)
print(f'Predicted Score : {score} || Actual Score : 147')
Predicted Score : 151 || Actual Score : 147
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439:
UserWarning: X does not have valid feature names, but
RandomForestRegressor was fitted with feature names
warnings.warn(
batting_team='Mumbai Indians'
bowling_team='Kings XI Punjab'
score = score_predict(batting_team, bowling_team, overs=12.3,
runs=113, wickets=2, runs_last_5=55, wickets_last_5=0)
warnings.warn(
batting_team="Kings XI Punjab"
bowling_team="Rajasthan Royals"
score =score_predict(batting_team, bowling_team, overs=14.0, runs=118,

warnings.warn(
batting_team="Kolkata Knight Riders"

bowling_team="Chennai Super Kings"
warnings.warn(
batting_team='Delhi Daredevils'
bowling_team='Mumbai Indians'
warnings.warn(
batting_team='Kings XI Punjab'
bowling_team='Chennai Super Kings'
warnings.warn(
batting_team='Sunrisers Hyderabad'
bowling_team='Royal Challengers Bangalore'

warnings.warn(
import pickle
filename = "ml_model.pkl"
pickle.dump(forest, open(filename, "wb"))

# Importing Necessary Libraries: Import As Import As Import As Import As

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

# Importing Necessary Libraries: Import As Import As Import As Import As

Uploaded by

Copyright:

Available Formats

# Importing Necessary Libraries

Dataset successfully Imported of Shape : (76014, 15)

# First 5 Columns Data

{"summary":"{\n \"name\": \"ipl_df\",\n \"rows\": 76014,\n

# Describing the ipl_dfset

{"summary":"{\n \"name\": \"ipl_df\",\n \"rows\": 8,\n \"fields\":

# Information about Each Column

# Number of Unique Values in each column

# ipl_df types of all Columns

Index(['mid', 'date', 'venue', 'bat_team', 'bowl_team', 'batsman',

irrelevant = ['mid', 'date', 'venue','batsman', 'bowler', 'striker', 'non-striker'] print(f'Before

# Define Consistent Teams

print(f'Before Removing Inconsistent Teams : {ipl_df.shape}')

Before Removing Inconsistent Teams : (76014, 8)

{"summary":"{\n \"name\": \"ipl_df\",\n \"rows\": 53811,\n

print(f'Before Removing Overs : {ipl_df.shape}')

Before Removing Overs : (53811, 8)

{"summary":"{\n \"name\": \"ipl_df\",\n \"rows\": 40108,\n

from seaborn import heatmap

<ipython-input-28-8fde0e13fc28>:2: FutureWarning: The default value of

{"summary":"{\n \"name\": \"ipl_df\",\n \"rows\": 40108,\n

from sklearn.compose import ColumnTransformer

cols = ['batting_team_Chennai Super Kings', 'batting_team_Delhi

features = df.drop(['total'], axis=1)

from sklearn.model_selection import train_test_split

Training Set : (32086, 21)

from sklearn.tree import DecisionTreeRegressor

Train Score : 99.98%

from sklearn.metrics import mean_absolute_error as mae,

from sklearn.linear_model import LinearRegression

Train Score : 65.85%

print("---- Linear Regression - Model Evaluation ----")

---- Linear Regression - Model Evaluation ----

from sklearn.ensemble import RandomForestRegressor

print("---- Random Forest Regression - Model Evaluation ----")

---- Random Forest Regression - Model Evaluation ----

from sklearn.svm import SVR

from sklearn.svm import SVR

train_score_svm = str(svm.score(train_features, train_labels)*100)

Train Score : 57.34%

print("---- Support Vector Regression - Model Evaluation ----")

---- Support Vector Regression - Model Evaluation ----

XGBRegressor(base_score=None, booster=None, callbacks=None,

train_score_xgb = str(xgb.score(train_features, train_labels)*100)

Train Score : 88.63%

print("---- XGB Regression - Model Evaluation ----")

---- XGB Regression - Model Evaluation ----

from sklearn.neighbors import KNeighborsRegressor

Train Score : 86.83%

print("---- KNR - Model Evaluation ----")

---- KNR - Model Evaluation ----

import matplotlib.pyplot as plt

<BarContainer object of 6 artists>

Predicted Score : 151 || Actual Score : 147

Predicted Score : 188 || Actual Score : 176

Predicted Score : 175 || Actual Score : 185

batting_team="Kolkata Knight Riders"

Predicted Score : 172 || Actual Score : 172

Predicted Score : 108 || Actual Score : 110

Predicted Score : 148 || Actual Score : 153

Predicted Score : 153 || Actual Score : 146

You might also like