You are on page 1of 13

# Data Visualization

Data visualization is visual representation of data or information

matplotlib and seaborn are two libraries used for data visualization

matplotlib:
--lineplot,scatterplot

seaborn:
--univariant analysis(distplot,boxplot,countplot)
--bivariant analysis(barplot,scatterplot)
--multivariant analysis(heatmap)

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
executed in 1.20s, finished 11:39:15 2023-03-28

In [8]:

a=np.array([1,2,3,4,5])
b=np.arange(10,60,10)
executed in 12ms, finished 11:48:15 2023-03-28

In [6]:

a
executed in 17ms, finished 11:46:46 2023-03-28

Out[6]:

array([1, 2, 3, 4, 5])

In [9]:

b
executed in 16ms, finished 11:48:17 2023-03-28

Out[9]:

array([10, 20, 30, 40, 50])


In [10]:

plt.plot(a,b)
executed in 86ms, finished 11:48:20 2023-03-28

Out[10]:

[<matplotlib.lines.Line2D at 0x21af1118310>]

In [11]:

plt.plot(a,b)
plt.xlabel("age")
plt.ylabel("height")
plt.title("age vs height")
executed in 91ms, finished 11:57:18 2023-03-28

Out[11]:

Text(0.5, 1.0, 'age vs height')


In [13]:

c=np.arange(20,70,10)
c
executed in 16ms, finished 12:01:12 2023-03-28

Out[13]:

array([20, 30, 40, 50, 60])

In [14]:

plt.plot(a,b)
plt.plot(a,c)
plt.xlabel("age")
plt.ylabel("height")
plt.title("age vs height")
executed in 89ms, finished 12:02:42 2023-03-28

Out[14]:

Text(0.5, 1.0, 'age vs height')


In [15]:

plt.plot(a,b,label='A')
plt.plot(a,c,label='B')
plt.xlabel("age")
plt.ylabel("height")
plt.title("age vs height")
plt.legend()
executed in 121ms, finished 12:11:45 2023-03-28

Out[15]:

<matplotlib.legend.Legend at 0x21af1ff8370>

In [18]:

a=np.random.randint(10,50,10)
b=np.arange(0,10)
c=np.arange(5,15)
plt.scatter(a,b,label='A')
plt.scatter(a,c,label='B')
plt.legend()
executed in 103ms, finished 12:26:54 2023-03-28

Out[18]:

<matplotlib.legend.Legend at 0x21af19e77f0>
In [9]:

import matplotlib.pyplot as plt


import numpy as np
import seaborn as sns
executed in 6ms, finished 09:52:20 2023-03-29

In [3]:

a=np.array([1,2,3,4,5])
b=np.array([4,8,10,8,7])
fig,ax=plt.subplots(figsize=(5,4))
plt.plot(a,b)
executed in 147ms, finished 07:45:07 2023-03-29

Out[3]:

[<matplotlib.lines.Line2D at 0x1b82ce43460>]

In [4]:

plt.scatter(a,b)
executed in 88ms, finished 07:45:16 2023-03-29

Out[4]:

<matplotlib.collections.PathCollection at 0x1b82cf46610>
In [5]:

a=np.random.randint(1,10,20)
executed in 29ms, finished 07:45:18 2023-03-29

In [22]:

executed in 5ms, finished 13:51:13 2023-03-28

Out[22]:

array([3, 4, 8, 3, 3, 3, 6, 6, 8, 3, 4, 9, 1, 9, 8, 8, 4, 3, 8, 3])

In [11]:

import pandas as pd
executed in 3ms, finished 09:52:31 2023-03-29

In [7]:

emp=pd.read_csv("employee.csv")
executed in 39ms, finished 07:45:25 2023-03-29

In [8]:

emp.head()
executed in 32ms, finished 07:45:27 2023-03-29

Out[8]:

Age Attrition BusinessTravel DailyRate Department DistanceFromHome Education E

0 41 Yes Travel_Rarely 1102 Sales 1 2

Research &
1 49 No Travel_Frequently 279 8 1
Development

Research &
2 37 Yes Travel_Rarely 1373 2 2
Development

Research &
3 33 No Travel_Frequently 1392 3 4
Development

Research &
4 27 No Travel_Rarely 591 2 1
Development

5 rows × 35 columns

In [10]:

import seaborn as sns


executed in 3ms, finished 09:52:24 2023-03-29

# Distribution plot
Distribution plot are used for analyzing the detailed distribution of a dataset.

In [28]:

sns.distplot(emp["DistanceFromHome"])
executed in 1.04s, finished 14:00:18 2023-03-28

C:\Users\Harshitha GS\anaconda3\lib\site-packages\seaborn\distributions.p
y:2619: FutureWarning: `distplot` is a deprecated function and will be rem
oved in a future version. Please adapt your code to use either `displot`
(a figure-level function with similar flexibility) or `histplot` (an axes-
level function for histograms).
warnings.warn(msg, FutureWarning)

Out[28]:

<AxesSubplot:xlabel='DistanceFromHome', ylabel='Density'>

# box plot
Box plot are used for analyzing the detailed distribution of a dataset and detection of
outliers.
In [29]:

sns.boxplot(emp["MonthlyIncome"])
executed in 150ms, finished 14:20:16 2023-03-28

C:\Users\Harshitha GS\anaconda3\lib\site-packages\seaborn\_decorators.py:3
6: FutureWarning: Pass the following variable as a keyword arg: x. From ve
rsion 0.12, the only valid positional argument will be `data`, and passing
other arguments without an explicit keyword will result in an error or mis
interpretation.
warnings.warn(

Out[29]:

<AxesSubplot:xlabel='MonthlyIncome'>

#countplot used to univariant analysis of categorical features


In [30]:

sns.countplot(emp["Department"])
executed in 171ms, finished 14:36:55 2023-03-28

C:\Users\Harshitha GS\anaconda3\lib\site-packages\seaborn\_decorators.py:3
6: FutureWarning: Pass the following variable as a keyword arg: x. From ve
rsion 0.12, the only valid positional argument will be `data`, and passing
other arguments without an explicit keyword will result in an error or mis
interpretation.
warnings.warn(

Out[30]:

<AxesSubplot:xlabel='Department', ylabel='count'>

In [31]:

sns.countplot(data=emp,x="Department")
executed in 91ms, finished 14:48:54 2023-03-28

Out[31]:

<AxesSubplot:xlabel='Department', ylabel='count'>
In [12]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
executed in 10ms, finished 10:12:22 2023-03-29

In [13]:

emp=pd.read_csv("employee.csv")
emp.head()
executed in 79ms, finished 10:14:19 2023-03-29

Out[13]:

Age Attrition BusinessTravel DailyRate Department DistanceFromHome Education E

0 41 Yes Travel_Rarely 1102 Sales 1 2

Research &
1 49 No Travel_Frequently 279 8 1
Development

Research &
2 37 Yes Travel_Rarely 1373 2 2
Development

Research &
3 33 No Travel_Frequently 1392 3 4
Development

Research &
4 27 No Travel_Rarely 591 2 1
Development

5 rows × 35 columns

# Bar plot
Bar plot shows the relationship between a numeric and a categoric variable.
In [15]:

sns.barplot(data=emp,x="Department",y='MonthlyIncome')
executed in 226ms, finished 10:21:42 2023-03-29

Out[15]:

<AxesSubplot:xlabel='Department', ylabel='MonthlyIncome'>

In [16]:

sns.barplot(data=emp,x="Department",y='MonthlyIncome',hue="Attrition")
executed in 308ms, finished 10:31:30 2023-03-29

Out[16]:

<AxesSubplot:xlabel='Department', ylabel='MonthlyIncome'>

# Scatter plot
Scatter plot shows the relationship between two numerical variables.
In [17]:

sns.scatterplot(data=emp,x="DailyRate",y="MonthlyIncome")
executed in 156ms, finished 10:44:58 2023-03-29

Out[17]:

<AxesSubplot:xlabel='DailyRate', ylabel='MonthlyIncome'>

In [19]:

sns.scatterplot(data=emp,x="DailyRate",y="MonthlyIncome",hue="MonthlyIncome",style="Dep
executed in 382ms, finished 10:52:03 2023-03-29

Out[19]:

<AxesSubplot:xlabel='DailyRate', ylabel='MonthlyIncome'>

# Heatmap
A heatmap is a two-dimensional graphical representation of data where the individual
values that are contained in a matrix are represented as colours
In [20]:

sns.heatmap(emp.corr())
executed in 330ms, finished 11:02:28 2023-03-29

Out[20]:

<AxesSubplot:>

In [22]:

ins=pd.read_csv("insurance.csv")
executed in 23ms, finished 11:08:24 2023-03-29

In [25]:

sns.heatmap(ins.corr(),annot=True)
executed in 198ms, finished 11:18:52 2023-03-29

Out[25]:

<AxesSubplot:>

You might also like