Professional Documents
Culture Documents
Practical No. 1
2. Data loading-
dataset=pd.read_csv(r'C:\Users\My Pc\Desktop\automobile.csv')
dataset
1
PDVVP College of Engineering
dataset.head()
dataset.shape
dataset.info()
2
PDVVP College of Engineering
3. Data cleaning-
data=dataset.replace('?',np.NAN)
data.isnull().sum()
dataset['stroke'].fillna('np.nan',inplace=True)
dataset['horsepower-binned'].fillna('np.nan',inplace=True)
dataset.isnull().sum()
3
PDVVP College of Engineering
dataset.head(10)
4
PDVVP College of Engineering
5. Univariate analysis-
plt.figure(figsize=(10,8))
dataset[['engine-size','peak-rpm','curb-weight','horsepower','price']].hist(figsize=(10,8))
plt.figure(figsize=(10,8))
plt.tight_layout()
plt.show()
5
PDVVP College of Engineering
6. Findings-
plt.figure(1)
plt.subplot(221)
dataset['engine-
type'].value_counts(normalize=True).plot(figsize=(10,8),kind='bar',color='green')
plt.title("Number of Engine TYpe frequency diagram")
plt.ylabel('Number of Engine Type')
plt.xlabel('engine-type');
plt.subplot(222)
6
PDVVP College of Engineering
dataset['num-of-
doors'].value_counts(normalize=True).plot(figsize=(10,8),kind='bar',color='red')
plt.title("Number of Door frequency diagram")
plt.ylabel('Number of Doors')
plt.xlabel('num-of-doors');
plt.subplot(224)
dataset['body-
style'].value_counts(normalize=True).plot(figsize=(10,8),kind='bar',color='purple')
plt.title("Number of Body Style frequency diagram")
plt.ylabel('Number of vehicles')
plt.xlabel('body-style');
plt.tight_layout()
plt.show()
7
PDVVP College of Engineering
corr=dataset.corr()
plt.figure(figsize=(20,9))
a=sns.heatmap(corr,cmap='brg',annot=True,fmt='.2f')
8
PDVVP College of Engineering
7. Bivariate analysis –
plt.rcParams['figure.figsize']=(18,9)
ax=sns.boxplot(x="make",y="price",data=dataset)
plt.rcParams['figure.figsize']=(19,7)
ax=sns.boxplot(x="body-style",y="price",data=dataset)
9
PDVVP College of Engineering
plt.rcParams['figure.figsize']=(10,5)
ax=sns.boxplot(x="drive-wheels",y="price",data=dataset)
sns.regplot(x="engine-size",y="price",data=dataset)
plt.ylim(0,)
dataset[["engine-size","price"]].corr()
10
PDVVP College of Engineering
sns.regplot(x="highway-mpg",y="price",data=dataset)
plt.ylim(0,)
data[['highway-mpg','price']].corr()
sns.regplot(x="peak-rpm",y="price",data=dataset)
11
PDVVP College of Engineering
dataset[['peak-rpm','price']].corr()
sns.barplot(x="body-style",y="price",data=dataset)
sns.barplot(x="engine-location",y="price",data=dataset)
12
PDVVP College of Engineering
sns.barplot(x="drive-wheels",y="price",data=dataset)
13