You are on page 1of 27

1/15/23, 5:00 PM assignment10HUNAIN

DATA VISUALIZATION
Matplotlib

In [ ]: #importing libraries


import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [ ]: #import data


data=sns.load_dataset('titanic')
data

Out[ ]: survived pclass sex age sibsp parch fare embarked class who adult_male

0 0 3 male 22.0 1 0 7.2500 S Third man True

1 1 1 female 38.0 1 0 71.2833 C First woman False

2 1 3 female 26.0 0 0 7.9250 S Third woman False

3 1 1 female 35.0 1 0 53.1000 S First woman False

4 0 3 male 35.0 0 0 8.0500 S Third man True

... ... ... ... ... ... ... ... ... ... ... ..

886 0 2 male 27.0 0 0 13.0000 S Second man True

887 1 1 female 19.0 0 0 30.0000 S First woman False

888 0 3 female NaN 1 2 23.4500 S Third woman False

889 1 1 male 26.0 0 0 30.0000 C First man True

890 0 3 male 32.0 0 0 7.7500 Q Third man True

891 rows × 15 columns

In [ ]: #simple line plot


plt.plot(data['age'],data['fare'])
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 1/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #Customizing graph


plt.plot(data['age'],data['fare'],color="red")
plt.xlabel("Age of Passengers")
plt.ylabel("Fare")
plt.title("Age-Fare")
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 2/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #Scatter plot


plt.plot(data['age'],data['fare'],"o")
plt.xlabel("Age of Passengers")
plt.ylabel("Fare")
plt.title("Age-Fare")
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


Out[ ]:

In [ ]: #Same grapg as above


plt.scatter(data['age'],data['fare'],c='red')
plt.xlabel("Age of Passengers")
plt.ylabel("Fare")
plt.title("Age-Fare")
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 3/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #grouping data by different colors


color=[]
for sex in data['sex']:
if sex == 'male':
color.append('blue')
else:
color.append('red')
plt.scatter(data['age'],data['fare'],c=color)
plt.xlabel("Age of Passengers")
plt.ylabel("Fare")
plt.title("Age-Fare")
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 4/27
1/15/23, 5:00 PM assignment10HUNAIN

#### Blue dots represent males


#### Red dots represent females

In [ ]: #Another way of writing the above code


colors=['blue' if sex=='male' else 'red' for sex in data]
plt.scatter(data['age'],data['fare'],c=color)
plt.xlabel("Age of Passengers")
plt.ylabel("Fare")
plt.title("Age-Fare")
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 5/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #Histogram
plt.hist(data['survived'])
plt.show()
#output contains only two bars as we have only one and zero in survived column

In [ ]: #Histogram
plt.hist(data['age'])
plt.show()

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 6/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #Bins in Histogram


plt.hist(data['age'], bins=15)
plt.show() #bin makes the bars thinner

In [ ]: #Value Count


data.value_counts()

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 7/27
1/15/23, 5:00 PM assignment10HUNAIN

survived pclass sex age sibsp parch fare embarked class who ad
Out[ ]:
ult_male deck embark_town alive alone
1 1 female 24.0 0 0 69.3000 C First woman Fa
lse B Cherbourg yes True 2
58.0 0 0 26.5500 S First woman Fa
lse C Southampton yes True 1
49.0 0 0 25.9292 S First woman Fa
lse D Southampton yes True 1
1 0 76.7292 C First woman Fa
lse D Cherbourg yes False 1
50.0 0 1 247.5208 C First woman Fa
lse B Cherbourg yes False 1

..
16.0 0 0 86.5000 S First woman Fa
lse B Southampton yes True 1
1 39.4000 S First woman Fa
lse D Southampton yes False 1
57.9792 C First woman Fa
lse B Cherbourg
yes False 1
17.0 1 0 57.0000 S First woman Fa
lse B Southampton yes False 1
3 male 32.0 0 0 8.0500 S Third man Tr
ue E Southampton yes True 1
Length: 181, dtype: int64

In [ ]: #value count for specific columns


data['age'].value_counts()

24.00 30
Out[ ]:
22.00 27
18.00 26
19.00 25
28.00 25
..
36.50 1
55.50 1
0.92 1
23.50 1
74.00 1
Name: age, Length: 88, dtype: int64

In [ ]: data['age'].value_counts().plot(kind='bar')
#this graph is so skewed as we have many ages

<AxesSubplot:>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 8/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: data['survived'].value_counts().plot(kind='bar')
#bar graph for survived is not skewed as we have only 1 and 0 in survived column

<AxesSubplot:>
Out[ ]:

In [ ]: #Count plot by seaborn library


sns.countplot(data=data)
#the age column is empty from the top, this means age has null values

<AxesSubplot:ylabel='count'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 9/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: sns.countplot(data['age'])
#again we obtained a very skew graph

c:\Users\AL Ghani Computer\anaconda3\lib\site-packages\seaborn\_decorators.py:36:


FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12,
the only valid positional argument will be `data`, and passing other arguments wit
hout an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:xlabel='age', ylabel='count'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 10/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: sns.countplot(data['survived'])
#not skewed graph

c:\Users\AL Ghani Computer\anaconda3\lib\site-packages\seaborn\_decorators.py:36:


FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12,
the only valid positional argument will be `data`, and passing other arguments wit
hout an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:xlabel='survived', ylabel='count'>
Out[ ]:

In [ ]: #Data representation more accurately


sns.countplot(data['survived'],hue=data['sex'])

c:\Users\AL Ghani Computer\anaconda3\lib\site-packages\seaborn\_decorators.py:36:


FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12,
the only valid positional argument will be `data`, and passing other arguments wit
hout an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:xlabel='survived', ylabel='count'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 11/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #Division on basis of Pclass


sns.countplot(data=data,x='survived',hue='pclass')

<AxesSubplot:xlabel='survived', ylabel='count'>
Out[ ]:

In [ ]: #scatter plot between survived & age with hue pclass
sns.scatterplot(data=data,x='survived',y='age',hue='pclass')

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 12/27
1/15/23, 5:00 PM assignment10HUNAIN

<AxesSubplot:xlabel='survived', ylabel='age'>
Out[ ]:

In [ ]: #barplot between age & fare


sns.barplot(data=data,x='age',y='fare',hue='embarked')

<AxesSubplot:xlabel='age', ylabel='fare'>
Out[ ]:

In [ ]: #barplot between sex & fare


sns.barplot(data=data,x='sex',y='fare',hue='embarked')

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 13/27
1/15/23, 5:00 PM assignment10HUNAIN

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #barplot between sex & fare


sns.barplot(data=data,x='sex',y='fare',hue='pclass')

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #pairplot of all


sns.pairplot(data=data,vars=['age','survived','sex','fare'])

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 14/27
1/15/23, 5:00 PM assignment10HUNAIN

plt.figure(figsize=(10,15))
plt.show()

<Figure size 1000x1500 with 0 Axes>

In [ ]: #histograms of all


data.hist()

array([[<AxesSubplot:title={'center':'survived'}>,
Out[ ]:
<AxesSubplot:title={'center':'pclass'}>],
[<AxesSubplot:title={'center':'age'}>,
<AxesSubplot:title={'center':'sibsp'}>],
[<AxesSubplot:title={'center':'parch'}>,
<AxesSubplot:title={'center':'fare'}>]], dtype=object)

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 15/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #boxplot of fare & sex(male, female)


sns.boxplot(data=data,x='sex',y='fare',hue='pclass')
#data outside is called outlier

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #boxenplot
sns.boxenplot(data=data,x='sex',y='fare',hue='pclass')

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 16/27
1/15/23, 5:00 PM assignment10HUNAIN

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #Swarmplot
sns.swarmplot(data=data,x='sex',y='fare',hue='pclass')

c:\Users\AL Ghani Computer\anaconda3\lib\site-packages\seaborn\categorical.py:129


6: UserWarning: 65.5% of the points cannot be placed; you may want to decrease the
size of the markers or use stripplot.
warnings.warn(msg, UserWarning)
c:\Users\AL Ghani Computer\anaconda3\lib\site-packages\seaborn\categorical.py:129
6: UserWarning: 42.7% of the points cannot be placed; you may want to decrease the
size of the markers or use stripplot.
warnings.warn(msg, UserWarning)
<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 17/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #Stripplot
sns.stripplot(data=data,x='sex',y='fare',hue='pclass')

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #heatmap
corr=data.corr()
sns.heatmap(corr)
#range of heatmap (-1 - +1)
file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 18/27
1/15/23, 5:00 PM assignment10HUNAIN

<AxesSubplot:>
Out[ ]:

In [ ]: #boxplot
sns.boxplot(data=data,x='sex',y='fare',hue='who')

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 19/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #swarmplot
sns.swarmplot(data=data,x='sex',y='fare',hue='who')

c:\Users\AL Ghani Computer\anaconda3\lib\site-packages\seaborn\categorical.py:129


6: UserWarning: 65.5% of the points cannot be placed; you may want to decrease the
size of the markers or use stripplot.
warnings.warn(msg, UserWarning)
c:\Users\AL Ghani Computer\anaconda3\lib\site-packages\seaborn\categorical.py:129
6: UserWarning: 42.7% of the points cannot be placed; you may want to decrease the
size of the markers or use stripplot.
warnings.warn(msg, UserWarning)
<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 20/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #boxenplot
sns.boxenplot(data=data,x='sex',y='fare',hue='who')

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #stripplot
sns.stripplot(data=data,x='sex',y='fare',hue='who')

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 21/27
1/15/23, 5:00 PM assignment10HUNAIN

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #violinplot
sns.violinplot(data=data,x='sex',y='fare',hue='who')

<AxesSubplot:xlabel='sex', ylabel='fare'>
Out[ ]:

In [ ]: #lmplot
sns.lmplot(data=data,x='age',y='fare',hue='who')

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 22/27
1/15/23, 5:00 PM assignment10HUNAIN

<seaborn.axisgrid.FacetGrid at 0x207e6b6b760>
Out[ ]:

In [ ]: #without hue


sns.lmplot(data=data,x='age',y='fare')

<seaborn.axisgrid.FacetGrid at 0x207e6de1190>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 23/27
1/15/23, 5:00 PM assignment10HUNAIN

if we have to find a graph of age below 30


In [ ]: #boxplot
sns.boxplot(data=data[data['age']<30],x='age',y='fare',hue='who')

<AxesSubplot:xlabel='age', ylabel='fare'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 24/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #Age_grwoth column is added in the last


data["age_growth"]=data['age']+10
data.head()

Out[ ]: survived pclass sex age sibsp parch fare embarked class who adult_male de

0 0 3 male 22.0 1 0 7.2500 S Third man True Na

1 1 1 female 38.0 1 0 71.2833 C First woman False

2 1 3 female 26.0 0 0 7.9250 S Third woman False Na

3 1 1 female 35.0 1 0 53.1000 S First woman False

4 0 3 male 35.0 0 0 8.0500 S Third man True Na

In [ ]: #scatterplot
sns.scatterplot(data=data,x='age',y='age_growth',hue='who')

<AxesSubplot:xlabel='age', ylabel='age_growth'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 25/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #new column of age_label


data['age_label']=pd.cut(data['age'],[0,10,18,30,40,80],labels=['kids','child','you
data.head()

Out[ ]: survived pclass sex age sibsp parch fare embarked class who adult_male de

0 0 3 male 22.0 1 0 7.2500 S Third man True Na

1 1 1 female 38.0 1 0 71.2833 C First woman False

2 1 3 female 26.0 0 0 7.9250 S Third woman False Na

3 1 1 female 35.0 1 0 53.1000 S First woman False

4 0 3 male 35.0 0 0 8.0500 S Third man True Na

In [ ]: #boxenplot
sns.boxenplot(data=data,x='fare',y='age_growth',hue='age_label')

<AxesSubplot:xlabel='fare', ylabel='age_growth'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 26/27
1/15/23, 5:00 PM assignment10HUNAIN

In [ ]: #boxenplot
sns.boxenplot(data=data,y='age_growth',x='sex',hue='age_label')

<AxesSubplot:xlabel='sex', ylabel='age_growth'>
Out[ ]:

file:///C:/Users/nsoha/Downloads/assignment10HUNAIN.html 27/27

You might also like