Professional Documents
Culture Documents
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#Importing dataset
ipl_df = pd.read_csv('/content/ipl_data.csv')
print(f"Dataset successfully Imported of Shape : {ipl_df.shape}")
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17478 entries, 0 to 17477
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 mid 17478 non-null int64
1 date 17478 non-null object
2 venue 17478 non-null object
3 bat_team 17478 non-null object
4 bowl_team 17478 non-null object
5 batsman 17478 non-null object
6 bowler 17478 non-null object
7 runs 17477 non-null float64
8 wickets 17477 non-null float64
9 overs 17477 non-null float64
10 runs_last_5 17477 non-null float64
11 wickets_last_5 17477 non-null float64
12 striker 17477 non-null float64
13 non-striker 17477 non-null float64
14 total 17477 non-null float64
dtypes: float64(8), int64(1), object(6)
memory usage: 2.0+ MB
mid 142
date 102
venue 20
bat_team 8
bowl_team 8
batsman 214
bowler 155
runs 225
wickets 11
overs 139
runs_last_5 85
wickets_last_5 7
striker 129
non-striker 59
total 84
dtype: int64
mid int64
date object
venue object
bat_team object
bowl_team object
batsman object
bowler object
runs float64
wickets float64
overs float64
runs_last_5 float64
wickets_last_5 float64
striker float64
non-striker float64
total float64
dtype: object
#Wickets Distribution
sns.displot(ipl_df['wickets'],kde=False,bins=10)
plt.title("Wickets Distribution")
plt.show()
#Runs Distribution
sns.displot(ipl_df['total'],kde=False,bins=10)
plt.title("Runs Distribution")
plt.show()
# Names of all columns
ipl_df.columns
<Axes: >
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
le = LabelEncoder()
for col in ['bat_team', 'bowl_team']:
ipl_df[col] = le.fit_transform(ipl_df[col])
ipl_df.head()
ipl_df = np.array(columnTransformer.fit_transform(ipl_df))
# Encoded Data
df.head()
{"type":"dataframe","variable_name":"df"}
models = dict()
DecisionTreeRegressor()
# Evaluate Model
train_score_tree = str(tree.score(train_features, train_labels) * 100)
test_score_tree = str(tree.score(test_features, test_labels) * 100)
print(f'Train Score : {train_score_tree[:5]}%\nTest Score :
{test_score_tree[:5]}%')
models["tree"] = test_score_tree
LinearRegression()
# Evaluate Model
train_score_linreg = str(linreg.score(train_features, train_labels) *
100)
test_score_linreg = str(linreg.score(test_features, test_labels) *
100)
print(f'Train Score : {train_score_linreg[:5]}%\nTest Score :
{test_score_linreg[:5]}%')
models["linreg"] = test_score_linreg
RandomForestRegressor()
# Evaluate Model
train_score_forest = str(forest.score(train_features,
train_labels)*100)
test_score_forest = str(forest.score(test_features, test_labels)*100)
print(f'Train Score : {train_score_forest[:5]}%\nTest Score :
{test_score_forest[:5]}%')
models["forest"] = test_score_forest
Train Score : 99.04%
Test Score : 93.78%
SVR()
SVR()
KNeighborsRegressor()
train_score_knr = str(knr.score(train_features, train_labels)*100)
test_score_knr = str(knr.score(test_features, test_labels)*100)
print(f'Train Score : {train_score_knr[:5]}%\nTest Score :
{test_score_knr[:5]}%')
models["knr"] = test_score_knr
batting_team='Delhi Daredevils'
bowling_team='Chennai Super Kings'
score = score_predict(batting_team, bowling_team, overs=10.2, runs=68,
wickets=3, runs_last_5=29, wickets_last_5=1)
print(f'Predicted Score : {score} || Actual Score : 147')
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439:
UserWarning: X does not have valid feature names, but
RandomForestRegressor was fitted with feature names
warnings.warn(
batting_team='Mumbai Indians'
bowling_team='Kings XI Punjab'
score = score_predict(batting_team, bowling_team, overs=12.3,
runs=113, wickets=2, runs_last_5=55, wickets_last_5=0)
print(f'Predicted Score : {score} || Actual Score : 176')
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439:
UserWarning: X does not have valid feature names, but
RandomForestRegressor was fitted with feature names
warnings.warn(
batting_team="Kings XI Punjab"
bowling_team="Rajasthan Royals"
score =score_predict(batting_team, bowling_team, overs=14.0, runs=118,
wickets=1, runs_last_5=45, wickets_last_5=0)
print(f'Predicted Score : {score} || Actual Score : 185')
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439:
UserWarning: X does not have valid feature names, but
RandomForestRegressor was fitted with feature names
warnings.warn(
batting_team='Delhi Daredevils'
bowling_team='Mumbai Indians'
score = score_predict(batting_team, bowling_team, overs=18.0, runs=96,
wickets=8, runs_last_5=18, wickets_last_5=4)
print(f'Predicted Score : {score} || Actual Score : 110')
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439:
UserWarning: X does not have valid feature names, but
RandomForestRegressor was fitted with feature names
warnings.warn(
batting_team='Kings XI Punjab'
bowling_team='Chennai Super Kings'
score = score_predict(batting_team, bowling_team, overs=18.0,
runs=129, wickets=6, runs_last_5=34, wickets_last_5=2)
print(f'Predicted Score : {score} || Actual Score : 153')
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439:
UserWarning: X does not have valid feature names, but
RandomForestRegressor was fitted with feature names
warnings.warn(
batting_team='Sunrisers Hyderabad'
bowling_team='Royal Challengers Bangalore'
score = score_predict(batting_team, bowling_team, overs=10.5, runs=67,
wickets=3, runs_last_5=29, wickets_last_5=1)
print(f'Predicted Score : {score} || Actual Score : 146')
import pickle
filename = "ml_model.pkl"
pickle.dump(forest, open(filename, "wb"))