You are on page 1of 19

4/15/24, 4:57 PM ~BDA-4 EDA Project

In [1]: import pandas as pd


import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#to ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [3]: data = pd.read_csv("C:/Users/Admin/Dropbox/~AA_COMSATS Courses Teaching/Data Bank/Momb

In [4]: data.head()

Out[4]: S.No. Name Location Year Kilometers_Driven Fuel_Type Transmission Owner_Type Mile

Maruti
0 0 Wagon R Mumbai 2010 72000 CNG Manual First
km
LXI CNG

Hyundai
Creta 1.6 1
1 1 Pune 2015 41000 Diesel Manual First
CRDi SX k
Option

Honda
2 2 Chennai 2011 46000 Petrol Manual First
Jazz V k

Maruti 2
3 3 Chennai 2012 87000 Diesel Manual First
Ertiga VDI k

Audi A4
New 2.0
4 4 Coimbatore 2013 40670 Diesel Automatic Second
TDI k
Multitronic

In [5]: data.tail()

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 1/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

Out[5]: S.No. Name Location Year Kilometers_Driven Fuel_Type Transmission Owner_Type

Volkswagen
Vento
7248 7248 Hyderabad 2011 89411 Diesel Manual First
Diesel
Trendline

Volkswagen
7249 7249 Mumbai 2015 59000 Petrol Automatic First
Polo GT TSI

Nissan
7250 7250 Micra Kolkata 2012 28000 Diesel Manual First
Diesel XV

Volkswagen
7251 7251 Pune 2013 52262 Petrol Automatic Third
Polo GT TSI

Mercedes-
Benz E-
7252 7252 Class 2009- Kochi 2014 72443 Diesel Automatic First
2013 E 220
CDI Avan...

In [6]: data.dtypes

S.No. int64
Out[6]:
Name object
Location object
Year int64
Kilometers_Driven int64
Fuel_Type object
Transmission object
Owner_Type object
Mileage object
Engine object
Power object
Seats float64
New_Price object
Price float64
dtype: object

In [7]: data.describe().T

Out[7]: count mean std min 25% 50% 75% max

S.No. 7253.0 3626.000000 2093.905084 0.00 1813.0 3626.00 5439.00 7252.0

Year 7253.0 2013.365366 3.254421 1996.00 2011.0 2014.00 2016.00 2019.0

Kilometers_Driven 7253.0 58699.063146 84427.720583 171.00 34000.0 53416.00 73000.00 6500000.0

Seats 7200.0 5.279722 0.811660 0.00 5.0 5.00 5.00 10.0

Price 6019.0 9.479468 11.187917 0.44 3.5 5.64 9.95 160.0

In [8]: data.info()

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 2/19


4/15/24, 4:57 PM ~BDA-4 EDA Project
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7253 entries, 0 to 7252
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 S.No. 7253 non-null int64
1 Name 7253 non-null object
2 Location 7253 non-null object
3 Year 7253 non-null int64
4 Kilometers_Driven 7253 non-null int64
5 Fuel_Type 7253 non-null object
6 Transmission 7253 non-null object
7 Owner_Type 7253 non-null object
8 Mileage 7251 non-null object
9 Engine 7207 non-null object
10 Power 7207 non-null object
11 Seats 7200 non-null float64
12 New_Price 1006 non-null object
13 Price 6019 non-null float64
dtypes: float64(2), int64(3), object(9)
memory usage: 793.4+ KB

In [9]: data.nunique()

S.No. 7253
Out[9]:
Name 2041
Location 11
Year 23
Kilometers_Driven 3660
Fuel_Type 5
Transmission 2
Owner_Type 4
Mileage 450
Engine 150
Power 386
Seats 9
New_Price 625
Price 1373
dtype: int64

In [10]: data.isnull().sum()

S.No. 0
Out[10]:
Name 0
Location 0
Year 0
Kilometers_Driven 0
Fuel_Type 0
Transmission 0
Owner_Type 0
Mileage 2
Engine 46
Power 46
Seats 53
New_Price 6247
Price 1234
dtype: int64

In [12]: # Remove S.No. column from data


data = data.drop(['S.No.'], axis = 1)

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 3/19


4/15/24, 4:57 PM ~BDA-4 EDA Project
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7253 entries, 0 to 7252
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 7253 non-null object
1 Location 7253 non-null object
2 Year 7253 non-null int64
3 Kilometers_Driven 7253 non-null int64
4 Fuel_Type 7253 non-null object
5 Transmission 7253 non-null object
6 Owner_Type 7253 non-null object
7 Mileage 7251 non-null object
8 Engine 7207 non-null object
9 Power 7207 non-null object
10 Seats 7200 non-null float64
11 New_Price 1006 non-null object
12 Price 6019 non-null float64
dtypes: float64(2), int64(2), object(9)
memory usage: 736.8+ KB

In [13]: from datetime import date


date.today().year
data['Car_Age']=date.today().year-data['Year']
data.head()

Out[13]: Name Location Year Kilometers_Driven Fuel_Type Transmission Owner_Type Mileage E

Maruti
26.6
0 Wagon R Mumbai 2010 72000 CNG Manual First 9
km/kg
LXI CNG

Hyundai
Creta 1.6 19.67
1 Pune 2015 41000 Diesel Manual First
CRDi SX kmpl
Option

Honda 18.2
2 Chennai 2011 46000 Petrol Manual First
Jazz V kmpl

Maruti 20.77
3 Chennai 2012 87000 Diesel Manual First
Ertiga VDI kmpl

Audi A4
New 2.0 15.2
4 Coimbatore 2013 40670 Diesel Automatic Second
TDI kmpl
Multitronic

In [14]: data.describe().T

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 4/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

Out[14]: count mean std min 25% 50% 75% max

Year 7253.0 2013.365366 3.254421 1996.00 2011.0 2014.00 2016.00 2019.0

Kilometers_Driven 7253.0 58699.063146 84427.720583 171.00 34000.0 53416.00 73000.00 6500000.0

Seats 7200.0 5.279722 0.811660 0.00 5.0 5.00 5.00 10.0

Price 6019.0 9.479468 11.187917 0.44 3.5 5.64 9.95 160.0

Car_Age 7253.0 10.634634 3.254421 5.00 8.0 10.00 13.00 28.0

In [15]: data['Brand'] = data.Name.str.split().str.get(0)


data['Model'] = data.Name.str.split().str.get(1) + data.Name.str.split().str.get(2)
data[['Name','Brand','Model']]

Out[15]: Name Brand Model

0 Maruti Wagon R LXI CNG Maruti WagonR

1 Hyundai Creta 1.6 CRDi SX Option Hyundai Creta1.6

2 Honda Jazz V Honda JazzV

3 Maruti Ertiga VDI Maruti ErtigaVDI

4 Audi A4 New 2.0 TDI Multitronic Audi A4New

... ... ... ...

7248 Volkswagen Vento Diesel Trendline Volkswagen VentoDiesel

7249 Volkswagen Polo GT TSI Volkswagen PoloGT

7250 Nissan Micra Diesel XV Nissan MicraDiesel

7251 Volkswagen Polo GT TSI Volkswagen PoloGT

7252 Mercedes-Benz E-Class 2009-2013 E 220 CDI Avan... Mercedes-Benz E-Class2009-2013

7253 rows × 3 columns

In [16]: print(data.Brand.unique())

['Maruti' 'Hyundai' 'Honda' 'Audi' 'Nissan' 'Toyota' 'Volkswagen' 'Tata'


'Land' 'Mitsubishi' 'Renault' 'Mercedes-Benz' 'BMW' 'Mahindra' 'Ford'
'Porsche' 'Datsun' 'Jaguar' 'Volvo' 'Chevrolet' 'Skoda' 'Mini' 'Fiat'
'Jeep' 'Smart' 'Ambassador' 'Isuzu' 'ISUZU' 'Force' 'Bentley'
'Lamborghini' 'Hindustan' 'OpelCorsa']

In [17]: searchfor = ['Isuzu' ,'ISUZU','Mini','Land']


data[data.Brand.str.contains('|'.join(searchfor))].head(5)

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 5/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

Out[17]: Name Location Year Kilometers_Driven Fuel_Type Transmission Owner_Type Mileage

Land Rover
Range 12.7
13 Delhi 2014 72000 Diesel Automatic First
Rover 2.2L kmp
Pure

Land Rover
14 Freelander Pune 2012 85000 Diesel Automatic Second 0.0 kmp
2 TD4 SE

Mini
16.6
176 Countryman Jaipur 2017 8525 Diesel Automatic Second
kmp
Cooper D

Land Rover
Range 12.7
191 Coimbatore 2018 36091 Diesel Automatic First
Rover 2.2L kmp
Dynamic

Mini
Cooper 16.82
228 Kochi 2017 26327 Petrol Automatic First
Convertible kmp
S

In [18]: data["Brand"].replace({"ISUZU": "Isuzu", "Mini": "Mini Cooper","Land":"Land Rover"}, i

In [19]: data.describe(include='all').T

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 6/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

Out[19]: count unique top freq mean std min 25% 50

Mahindra
Name 7253 2041 XUV500 55 NaN NaN NaN NaN N
W8 2WD

Location 7253 11 Mumbai 949 NaN NaN NaN NaN N

Year 7253.0 NaN NaN NaN 2013.365366 3.254421 1996.0 2011.0 201

Kilometers_Driven 7253.0 NaN NaN NaN 58699.063146 84427.720583 171.0 34000.0 5341

Fuel_Type 7253 5 Diesel 3852 NaN NaN NaN NaN N

Transmission 7253 2 Manual 5204 NaN NaN NaN NaN N

Owner_Type 7253 4 First 5952 NaN NaN NaN NaN N

Mileage 7251 450 17.0 kmpl 207 NaN NaN NaN NaN N

Engine 7207 150 1197 CC 732 NaN NaN NaN NaN N

Power 7207 386 74 bhp 280 NaN NaN NaN NaN N

Seats 7200.0 NaN NaN NaN 5.279722 0.81166 0.0 5.0

63.71
New_Price 1006 625 6 NaN NaN NaN NaN N
Lakh

Price 6019.0 NaN NaN NaN 9.479468 11.187917 0.44 3.5 5

Car_Age 7253.0 NaN NaN NaN 10.634634 3.254421 5.0 8.0 1

Brand 7253 32 Maruti 1444 NaN NaN NaN NaN N

Model 7252 726 SwiftDzire 189 NaN NaN NaN NaN N

In [20]: cat_cols=data.select_dtypes(include=['object']).columns
num_cols = data.select_dtypes(include=np.number).columns.tolist()
print("Categorical Variables:")
print(cat_cols)
print("Numerical Variables:")
print(num_cols)

Categorical Variables:
Index(['Name', 'Location', 'Fuel_Type', 'Transmission', 'Owner_Type',
'Mileage', 'Engine', 'Power', 'New_Price', 'Brand', 'Model'],
dtype='object')
Numerical Variables:
['Year', 'Kilometers_Driven', 'Seats', 'Price', 'Car_Age']

In [25]: for col in num_cols:


print(col)
print('Skew :', round(data[col].skew(), 2))
plt.figure(figsize = (15, 4))
plt.subplot(1, 2, 1)
data[col].hist(grid=False)
plt.ylabel('count')
plt.subplot(1, 2, 2)
sns.boxplot(x=data[col])
plt.show()

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 7/19


4/15/24, 4:57 PM ~BDA-4 EDA Project
Year
Skew : -0.84

Kilometers_Driven
Skew : 61.58

Seats
Skew : 1.9

Price
Skew : 3.34

Car_Age
Skew : 0.84

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 8/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

In [21]: import matplotlib.pyplot as plt


import seaborn as sns

fig, axes = plt.subplots(3, 2, figsize=(18, 18))


fig.suptitle('Bar plot for all categorical variables in the dataset')

def add_percentage_labels(ax):
total = len(data)
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height() / total)
x = p.get_x() + p.get_width() / 2
y = p.get_height()
ax.annotate(percentage, (x, y), ha='center', va='bottom')

sns.countplot(ax=axes[0, 0], x='Fuel_Type', data=data, color='blue',


order=data['Fuel_Type'].value_counts().index)
add_percentage_labels(axes[0, 0])

sns.countplot(ax=axes[0, 1], x='Transmission', data=data, color='blue',


order=data['Transmission'].value_counts().index)
add_percentage_labels(axes[0, 1])

sns.countplot(ax=axes[1, 0], x='Owner_Type', data=data, color='blue',


order=data['Owner_Type'].value_counts().index)
add_percentage_labels(axes[1, 0])

sns.countplot(ax=axes[1, 1], x='Location', data=data, color='blue',


order=data['Location'].value_counts().index)
add_percentage_labels(axes[1, 1])

sns.countplot(ax=axes[2, 0], x='Brand', data=data, color='blue',


order=data['Brand'].head(20).value_counts().index)
add_percentage_labels(axes[2, 0])

sns.countplot(ax=axes[2, 1], x='Model', data=data, color='blue',


order=data['Model'].head(20).value_counts().index)
add_percentage_labels(axes[2, 1])

axes[1][1].tick_params(labelrotation=45)
axes[2][0].tick_params(labelrotation=90)
axes[2][1].tick_params(labelrotation=90)

plt.show()

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 9/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

In [22]: # Histograms for numerical variables in one row


numerical_variables = ['Year', 'Kilometers_Driven', 'Mileage', 'Engine', 'Power', 'Sea
plt.figure(figsize=(18, 6))
for i, column in enumerate(numerical_variables, 1):
plt.subplot(1, 8, i)
sns.histplot(data[column], kde=True, color='skyblue')
plt.title(f'Distribution of {column}')

plt.tight_layout()
plt.show()

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 10/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

In [26]: # Function for log transformation of the column


def log_transform(data,col):
for colname in col:
if (data[colname] == 1.0).all():
data[colname + '_log'] = np.log(data[colname]+1)
else:
data[colname + '_log'] = np.log(data[colname])
data.info()

In [27]: log_transform(data,['Kilometers_Driven','Price'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7253 entries, 0 to 7252
Data columns (total 18 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 7253 non-null object
1 Location 7253 non-null object
2 Year 7253 non-null int64
3 Kilometers_Driven 7253 non-null int64
4 Fuel_Type 7253 non-null object
5 Transmission 7253 non-null object
6 Owner_Type 7253 non-null object
7 Mileage 7251 non-null object
8 Engine 7207 non-null object
9 Power 7207 non-null object
10 Seats 7200 non-null float64
11 New_Price 1006 non-null object
12 Price 6019 non-null float64
13 Car_Age 7253 non-null int64
14 Brand 7253 non-null object
15 Model 7252 non-null object
16 Kilometers_Driven_log 7253 non-null float64
17 Price_log 6019 non-null float64
dtypes: float64(4), int64(3), object(11)
memory usage: 1020.1+ KB

In [28]: #Log transformation of the feature 'Kilometers_Driven'


sns.distplot(data["Kilometers_Driven_log"], axlabel="Kilometers_Driven_log");

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 11/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

In [29]: plt.figure(figsize=(13,17))
sns.pairplot(data=data.drop(['Kilometers_Driven','Price'],axis=1))
plt.show()

<Figure size 1300x1700 with 0 Axes>

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 12/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

In [30]: fig, axarr = plt.subplots(4, 2, figsize=(12, 18))


data.groupby('Location')['Price_log'].mean().sort_values(ascending=False).plot.bar(ax=
axarr[0][0].set_title("Location Vs Price", fontsize=18)
data.groupby('Transmission')['Price_log'].mean().sort_values(ascending=False).plot.bar
axarr[0][1].set_title("Transmission Vs Price", fontsize=18)
data.groupby('Fuel_Type')['Price_log'].mean().sort_values(ascending=False).plot.bar(ax
axarr[1][0].set_title("Fuel_Type Vs Price", fontsize=18)
data.groupby('Owner_Type')['Price_log'].mean().sort_values(ascending=False).plot.bar(a
axarr[1][1].set_title("Owner_Type Vs Price", fontsize=18)
data.groupby('Brand')['Price_log'].mean().sort_values(ascending=False).head(10).plot.b
axarr[2][0].set_title("Brand Vs Price", fontsize=18)
data.groupby('Model')['Price_log'].mean().sort_values(ascending=False).head(10).plot.b
axarr[2][1].set_title("Model Vs Price", fontsize=18)
data.groupby('Seats')['Price_log'].mean().sort_values(ascending=False).plot.bar(ax=axa
axarr[3][0].set_title("Seats Vs Price", fontsize=18)
data.groupby('Car_Age')['Price_log'].mean().sort_values(ascending=False).plot.bar(ax=a
axarr[3][1].set_title("Car_Age Vs Price", fontsize=18)
plt.subplots_adjust(hspace=1.0)
plt.subplots_adjust(wspace=.5)
sns.despine()

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 13/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 14/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

In [31]: plt.figure(figsize=(12, 7))


sns.heatmap(data.drop(['Kilometers_Driven','Price'],axis=1).corr(), annot = True, vmin
plt.show()

In [32]: data.loc[data["Mileage"]==0.0,'Mileage']=np.nan
data.Mileage.isnull().sum()

2
Out[32]:

In [33]: data.Seats.isnull().sum()
data['Seats'].fillna(value=np.nan,inplace=True)
data['Seats']=data.groupby(['Model','Brand'])['Seats'].apply(lambda x:x.fillna(x.media
data['Engine']=data.groupby(['Brand','Model'])['Engine'].apply(lambda x:x.fillna(x.med
data['Power']=data.groupby(['Brand','Model'])['Power'].apply(lambda x:x.fillna(x.media

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 15/19


4/15/24, 4:57 PM ~BDA-4 EDA Project

---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Roaming\Python\Python39\site-packages\pandas\core\nanops.py in nanmedian(va
lues, axis, skipna, mask)
718 try:
--> 719 values = values.astype("f8")
720 except ValueError as err:

ValueError: could not convert string to float: '1489 CC'

The above exception was the direct cause of the following exception:

TypeError Traceback (most recent call last)


~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\groupby.py in app
ly(self, func, *args, **kwargs)
1274 try:
-> 1275 result = self._python_apply_general(f, self._selected_obj)
1276 except TypeError:

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\groupby.py in _py
thon_apply_general(self, f, data)
1308 """
-> 1309 keys, values, mutated = self.grouper.apply(f, data, self.axis)
1310

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\ops.py in apply(s
elf, f, data, axis)
840 group_axes = group.axes
--> 841 res = f(group)
842 if not _is_indexed_like(res, group_axes, axis):

~\AppData\Local\Temp\ipykernel_5292\2680079267.py in <lambda>(x)
3 data['Seats']=data.groupby(['Model','Brand'])['Seats'].apply(lambda x:x.filln
a(x.median()))
----> 4 data['Engine']=data.groupby(['Brand','Model'])['Engine'].apply(lambda x:x.fil
lna(x.median()))
5 data['Power']=data.groupby(['Brand','Model'])['Power'].apply(lambda x:x.filln
a(x.median()))

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py in median(sel
f, axis, skipna, level, numeric_only, **kwargs)
10801 ):
> 10802 return NDFrame.median(self, axis, skipna, level, numeric_only, **
kwargs)
10803

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py in median(sel
f, axis, skipna, level, numeric_only, **kwargs)
10373 def median(self, axis=None, skipna=None, level=None, numeric_only=None, *
*kwargs):
> 10374 return self._stat_function(
10375 "median", nanops.nanmedian, axis, skipna, level, numeric_only, **
kwargs

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py in _stat_funct
ion(self, name, func, axis, skipna, level, numeric_only, **kwargs)
10353 )
> 10354 return self._reduce(
10355 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_o
nly
localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 16/19
4/15/24, 4:57 PM ~BDA-4 EDA Project

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\series.py in _reduce(sel
f, op, name, axis, skipna, numeric_only, filter_type, **kwds)
4391 with np.errstate(all="ignore"):
-> 4392 return op(delegate, skipna=skipna, **kwds)
4393

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\nanops.py in f(values, ax
is, skipna, **kwds)
154 else:
--> 155 result = alt(values, axis=axis, skipna=skipna, **kwds)
156

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\nanops.py in nanmedian(va
lues, axis, skipna, mask)
721 # e.g. "could not convert string to float: 'a'"
--> 722 raise TypeError(str(err)) from err
723 if mask is not None:

TypeError: could not convert string to float: '1489 CC'

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)


~\AppData\Roaming\Python\Python39\site-packages\pandas\core\nanops.py in nanmedian(va
lues, axis, skipna, mask)
718 try:
--> 719 values = values.astype("f8")
720 except ValueError as err:

ValueError: could not convert string to float: '1489 CC'

The above exception was the direct cause of the following exception:

TypeError Traceback (most recent call last)


~\AppData\Local\Temp\ipykernel_5292\2680079267.py in <module>
2 data['Seats'].fillna(value=np.nan,inplace=True)
3 data['Seats']=data.groupby(['Model','Brand'])['Seats'].apply(lambda x:x.filln
a(x.median()))
----> 4 data['Engine']=data.groupby(['Brand','Model'])['Engine'].apply(lambda x:x.fil
lna(x.median()))
5 data['Power']=data.groupby(['Brand','Model'])['Power'].apply(lambda x:x.filln
a(x.median()))

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\generic.py in app
ly(self, func, *args, **kwargs)
221 )
222 def apply(self, func, *args, **kwargs):
--> 223 return super().apply(func, *args, **kwargs)
224
225 @doc(_agg_template, examples=_agg_examples_doc, klass="Series")

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\groupby.py in app
ly(self, func, *args, **kwargs)
1284
1285 with group_selection_context(self):
-> 1286 return self._python_apply_general(f, self._selected_obj)
1287
1288 return result

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 17/19


4/15/24, 4:57 PM ~BDA-4 EDA Project
~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\groupby.py in _py
thon_apply_general(self, f, data)
1307 data after applying f
1308 """
-> 1309 keys, values, mutated = self.grouper.apply(f, data, self.axis)
1310
1311 return self._wrap_applied_output(

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\groupby\ops.py in apply(s
elf, f, data, axis)
839 # group might be modified
840 group_axes = group.axes
--> 841 res = f(group)
842 if not _is_indexed_like(res, group_axes, axis):
843 mutated = True

~\AppData\Local\Temp\ipykernel_5292\2680079267.py in <lambda>(x)
2 data['Seats'].fillna(value=np.nan,inplace=True)
3 data['Seats']=data.groupby(['Model','Brand'])['Seats'].apply(lambda x:x.filln
a(x.median()))
----> 4 data['Engine']=data.groupby(['Brand','Model'])['Engine'].apply(lambda x:x.fil
lna(x.median()))
5 data['Power']=data.groupby(['Brand','Model'])['Power'].apply(lambda x:x.filln
a(x.median()))

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py in median(sel
f, axis, skipna, level, numeric_only, **kwargs)
10800 self, axis=None, skipna=None, level=None, numeric_only=None, **kw
args
10801 ):
> 10802 return NDFrame.median(self, axis, skipna, level, numeric_only, **
kwargs)
10803
10804 setattr(cls, "median", median)

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py in median(sel
f, axis, skipna, level, numeric_only, **kwargs)
10372
10373 def median(self, axis=None, skipna=None, level=None, numeric_only=None, *
*kwargs):
> 10374 return self._stat_function(
10375 "median", nanops.nanmedian, axis, skipna, level, numeric_only, **
kwargs
10376 )

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py in _stat_funct
ion(self, name, func, axis, skipna, level, numeric_only, **kwargs)
10352 name, axis=axis, level=level, skipna=skipna, numeric_only=num
eric_only
10353 )
> 10354 return self._reduce(
10355 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_o
nly
10356 )

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\series.py in _reduce(sel
f, op, name, axis, skipna, numeric_only, filter_type, **kwds)
4390 )
4391 with np.errstate(all="ignore"):
-> 4392 return op(delegate, skipna=skipna, **kwds)

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 18/19


4/15/24, 4:57 PM ~BDA-4 EDA Project
4393
4394 def _reindex_indexer(

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\nanops.py in f(values, ax
is, skipna, **kwds)
153 result = alt(values, axis=axis, skipna=skipna, **kwds)
154 else:
--> 155 result = alt(values, axis=axis, skipna=skipna, **kwds)
156
157 return result

~\AppData\Roaming\Python\Python39\site-packages\pandas\core\nanops.py in nanmedian(va
lues, axis, skipna, mask)
720 except ValueError as err:
721 # e.g. "could not convert string to float: 'a'"
--> 722 raise TypeError(str(err)) from err
723 if mask is not None:
724 values[mask] = np.nan

TypeError: could not convert string to float: '1489 CC'

In [ ]:

localhost:8888/nbconvert/html/~BDA-4 EDA Project.ipynb?download=false 19/19

You might also like