You are on page 1of 5

import pandas as pd

import seaborn as sns

df = pd.read_csv('train.csv')

df.head()

PassengerId Survived Pclass \


0 1 0 3
1 2 1 1
2 3 1 3
3 4 1 1
4 5 0 3

Name Sex Age


SibSp \
0 Braund, Mr. Owen Harris male 22.0
1
1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0
1
2 Heikkinen, Miss. Laina female 26.0
0
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0
1
4 Allen, Mr. William Henry male 35.0
0

Parch Ticket Fare Cabin Embarked


0 0 A/5 21171 7.2500 NaN S
1 0 PC 17599 71.2833 C85 C
2 0 STON/O2. 3101282 7.9250 NaN S
3 0 113803 53.1000 C123 S
4 0 373450 8.0500 NaN S

1. Categorical Data
a. Countplot
sns.countplot(df['Embarked'])
#df['Survived'].value_counts().plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x1cc48b021f0>
b. PieChart
df['Sex'].value_counts().plot(kind='pie',autopct='%.2f')

<matplotlib.axes._subplots.AxesSubplot at 0x1cc48b142e0>
2. Numerical Data
a. Histogram
import matplotlib.pyplot as plt
plt.hist(df['Age'],bins=5)

(array([100., 346., 188., 69., 11.]),


array([ 0.42 , 16.336, 32.252, 48.168, 64.084, 80. ]),
<a list of 5 Patch objects>)

b. Distplot
sns.distplot(df['Age'])

<matplotlib.axes._subplots.AxesSubplot at 0x1cc4914c4f0>
c. Boxplot
sns.boxplot(df['Age'])

<matplotlib.axes._subplots.AxesSubplot at 0x1cc48ee1520>

df['Age'].min()
0.42

df['Age'].max()

80.0

df['Age'].mean()

29.69911764705882

df['Age'].skew()

0.38910778230082704

You might also like