You are on page 1of 12

In [3]:  import numpy as np

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [14]:  #import data#


data = pd.read_csv('CardioGoodFitness-1.csv')

In [15]:  #Display top 5 data#


data.head()

Out[15]: Product Age Gender Education MaritalStatus Usage Fitness Income Miles

0 TM195 18 Male 14 Single 3 4 29562 112

1 TM195 19 Male 15 Single 2 3 31836 75

2 TM195 19 Female 14 Partnered 4 3 30699 66

3 TM195 19 Male 12 Single 3 3 32973 85

4 TM195 20 Male 13 Partnered 4 2 35247 47

In [16]:  # analytical summary of the dataset


data.describe(include='all')

Out[16]: Product Age Gender Education MaritalStatus Usage Fitness

count 180 180.000000 180 180.000000 180 180.000000 180.000000 1

unique 3 NaN 2 NaN 2 NaN NaN

top TM195 NaN Male NaN Partnered NaN NaN

freq 80 NaN 104 NaN 107 NaN NaN

mean NaN 28.788889 NaN 15.572222 NaN 3.455556 3.311111 537

std NaN 6.943498 NaN 1.617055 NaN 1.084797 0.958869 165

min NaN 18.000000 NaN 12.000000 NaN 2.000000 1.000000 295

25% NaN 24.000000 NaN 14.000000 NaN 3.000000 3.000000 440

50% NaN 26.000000 NaN 16.000000 NaN 3.000000 3.000000 505

75% NaN 33.000000 NaN 16.000000 NaN 4.000000 4.000000 586

max NaN 50.000000 NaN 21.000000 NaN 7.000000 5.000000 1045


In [18]:  #to project number of records and variables
data.shape

Out[18]: (180, 9)

In [20]:  data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Product 180 non-null object
1 Age 180 non-null int64
2 Gender 180 non-null object
3 Education 180 non-null int64
4 MaritalStatus 180 non-null object
5 Usage 180 non-null int64
6 Fitness 180 non-null int64
7 Income 180 non-null int64
8 Miles 180 non-null int64
dtypes: int64(6), object(3)
memory usage: 12.8+ KB

In [22]:  # Percentage of missing values in columns


data_missing_columns = (round(((data.isnull().sum()/len(data.index))*100),2).
data_missing_columns

Out[22]: null

Product 0.0

Age 0.0

Gender 0.0

Education 0.0

MaritalStatus 0.0

Usage 0.0

Fitness 0.0

Income 0.0

Miles 0.0
In [23]:  #summary of distribution for relevant variables
data.hist(figsize=(20,30))

Out[23]: array([[<AxesSubplot:title={'center':'Age'}>,
<AxesSubplot:title={'center':'Education'}>],
[<AxesSubplot:title={'center':'Usage'}>,
<AxesSubplot:title={'center':'Fitness'}>],
[<AxesSubplot:title={'center':'Income'}>,
<AxesSubplot:title={'center':'Miles'}>]], dtype=object)
In [24]:  # Relationship between categorical and continuous variable
# Age distribution of gym goers with respect to gender
sns.boxplot(x="Gender", y="Age", data = data)

Out[24]: <AxesSubplot:xlabel='Gender', ylabel='Age'>

In [25]:  # Relationship between categorical and continuous variable


# Product distribution of gym goers with respect to gender
sns.boxplot(x="Product", y="Age", data = data)

Out[25]: <AxesSubplot:xlabel='Product', ylabel='Age'>


In [26]:  #Relationship between two categorical variables
pd.crosstab(data['Product'], data['Gender'])

Out[26]: Gender Female Male

Product

TM195 40 40

TM498 29 31

TM798 7 33

In [27]:  #Relationship between two categorical variables


sns.countplot(x="Product", hue = "Gender", data = data)

Out[27]: <AxesSubplot:xlabel='Product', ylabel='count'>

In [29]:  #pivot table


pd.pivot_table(data, index=['Product', 'Gender'], columns = ['MaritalStatus']

Out[29]: Age Education Fitness Inco

MaritalStatus Partnered Single Partnered Single Partnered Single Partnered Sin

Product Gender

Female 27 13 27 13 27 13 27
TM195
Male 21 19 21 19 21 19 21

Female 15 14 15 14 15 14 15
TM498
Male 21 10 21 10 21 10 21

Female 4 3 4 3 4 3 4
TM798
Male 19 14 19 14 19 14 19
In [30]:  ## Checking the distribution of the gender ##

Gender = data['Gender'].value_counts()
Gender

Out[30]: Male 104


Female 76
Name: Gender, dtype: int64

In [34]:  sns.pairplot(data)

Out[34]: <seaborn.axisgrid.PairGrid at 0x2219ae0feb0>


In [35]:  sns.distplot(data['Age'])

Out[35]: <AxesSubplot:xlabel='Age', ylabel='Density'>

In [36]:  sns.distplot(data['Fitness'])

Out[36]: <AxesSubplot:xlabel='Fitness', ylabel='Density'>


In [37]:  data.hist(by='Gender', column = 'Income')

Out[37]: array([<AxesSubplot:title={'center':'Female'}>,
<AxesSubplot:title={'center':'Male'}>], dtype=object)

In [38]:  corr= data.corr()


sns.heatmap(corr, annot=True)

Out[38]: <AxesSubplot:>
In [43]:  # Identify missing values of dataframe
data.isnull()

Out[43]: Product Age Gender Education MaritalStatus Usage Fitness Income Miles

0 False False False False False False False False False

1 False False False False False False False False False

2 False False False False False False False False False

3 False False False False False False False False False

4 False False False False False False False False False

... ... ... ... ... ... ... ... ... ...

175 False False False False False False False False False

176 False False False False False False False False False

177 False False False False False False False False False

178 False False False False False False False False False

179 False False False False False False False False False

180 rows × 9 columns

In [45]:  import os
print(os.environ['PATH'])

C:\Users\HP\anaconda3;C:\Users\HP\anaconda3\Library\mingw-w64\bin;C:\Users
\HP\anaconda3\Library\usr\bin;C:\Users\HP\anaconda3\Library\bin;C:\Users\HP
\anaconda3\Scripts;C:\Users\HP\anaconda3\bin;C:\Users\HP\anaconda3\condabi
n;C:\Users\HP\anaconda3;C:\Users\HP\anaconda3\Library\mingw-w64\bin;C:\User
s\HP\anaconda3\Library\usr\bin;C:\Users\HP\anaconda3\Library\bin;C:\Users\H
P\anaconda3\Scripts;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbe
m;C:\Windows\System32\WindowsPowerShell\v1.0;C:\Windows\System32\OpenSSH;
C:\Users\HP\AppData\Local\Microsoft\WindowsApps;.
In [4]:  pip install nbconvert

Requirement already satisfied: nbconvert in c:\users\hp\anaconda3\lib\si


te-packages (6.4.4)
Requirement already satisfied: pandocfilters>=1.4.1 in c:\users\hp\anaco
nda3\lib\site-packages (from nbconvert) (1.5.0)
Requirement already satisfied: jupyter-core in c:\users\hp\anaconda3\lib
\site-packages (from nbconvert) (4.9.2)
Requirement already satisfied: pygments>=2.4.1 in c:\users\hp\anaconda3
\lib\site-packages (from nbconvert) (2.11.2)
Requirement already satisfied: traitlets>=5.0 in c:\users\hp\anaconda3\l
ib\site-packages (from nbconvert) (5.1.1)
Requirement already satisfied: nbformat>=4.4 in c:\users\hp\anaconda3\li
b\site-packages (from nbconvert) (5.3.0)
Requirement already satisfied: entrypoints>=0.2.2 in c:\users\hp\anacond
a3\lib\site-packages (from nbconvert) (0.4)
Requirement already satisfied: jinja2>=2.4 in c:\users\hp\anaconda3\lib
\site-packages (from nbconvert) (2.11.3)Note: you may need to restart th
e kernel to use updated packages.

Requirement already satisfied: testpath in c:\users\hp\anaconda3\lib\sit


e-packages (from nbconvert) (0.5.0)
Requirement already satisfied: defusedxml in c:\users\hp\anaconda3\lib\s
ite-packages (from nbconvert) (0.7.1)
Requirement already satisfied: bleach in c:\users\hp\anaconda3\lib\site-
packages (from nbconvert) (4.1.0)
Requirement already satisfied: beautifulsoup4 in c:\users\hp\anaconda3\l
ib\site-packages (from nbconvert) (4.11.1)
Requirement already satisfied: jupyterlab-pygments in c:\users\hp\anacon
da3\lib\site-packages (from nbconvert) (0.1.2)
Requirement already satisfied: mistune<2,>=0.8.1 in c:\users\hp\anaconda
3\lib\site-packages (from nbconvert) (0.8.4)
Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in c:\users\hp\ana
conda3\lib\site-packages (from nbconvert) (0.5.13)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\hp\anaconda3
\lib\site-packages (from jinja2>=2.4->nbconvert) (2.0.1)
Requirement already satisfied: nest-asyncio in c:\users\hp\anaconda3\lib
\site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert) (1.5.5)
Requirement already satisfied: jupyter-client>=6.1.5 in c:\users\hp\anac
onda3\lib\site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert) (6.1.1
2)
Requirement already satisfied: pyzmq>=13 in c:\users\hp\anaconda3\lib\si
te-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconve
rt) (22.3.0)
Requirement already satisfied: tornado>=4.1 in c:\users\hp\anaconda3\lib
\site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbco
nvert) (6.1)
Requirement already satisfied: python-dateutil>=2.1 in c:\users\hp\anaco
nda3\lib\site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.
5.0->nbconvert) (2.8.2)
Requirement already satisfied: pywin32>=1.0 in c:\users\hp\anaconda3\lib
\site-packages (from jupyter-core->nbconvert) (302)
Requirement already satisfied: jsonschema>=2.6 in c:\users\hp\anaconda3
\lib\site-packages (from nbformat>=4.4->nbconvert) (4.4.0)
Requirement already satisfied: fastjsonschema in c:\users\hp\anaconda3\l
ib\site-packages (from nbformat>=4.4->nbconvert) (2.15.1)
Requirement already satisfied: attrs>=17.4.0 in c:\users\hp\anaconda3\li
b\site-packages (from jsonschema>=2.6->nbformat>=4.4->nbconvert) (21.4.
0)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.
14.0 in c:\users\hp\anaconda3\lib\site-packages (from jsonschema>=2.6->n
bformat>=4.4->nbconvert) (0.18.0)
Requirement already satisfied: six>=1.5 in c:\users\hp\anaconda3\lib\sit
e-packages (from python-dateutil>=2.1->jupyter-client>=6.1.5->nbclient<
0.6.0,>=0.5.0->nbconvert) (1.16.0)
Requirement already satisfied: soupsieve>1.2 in c:\users\hp\anaconda3\li
b\site-packages (from beautifulsoup4->nbconvert) (2.3.1)
Requirement already satisfied: webencodings in c:\users\hp\anaconda3\lib
\site-packages (from bleach->nbconvert) (0.5.1)
Requirement already satisfied: packaging in c:\users\hp\anaconda3\lib\si
te-packages (from bleach->nbconvert) (21.3)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\users\hp\a
naconda3\lib\site-packages (from packaging->bleach->nbconvert) (3.0.4)

In [5]:  pip install pyppeteer

Collecting pyppeteer
Downloading pyppeteer-1.0.2-py3-none-any.whl (83 kB)
Requirement already satisfied: certifi>=2021 in c:\users\hp\anaconda3\lib\s
ite-packages (from pyppeteer) (2022.12.7)
Requirement already satisfied: appdirs<2.0.0,>=1.4.3 in c:\users\hp\anacond
a3\lib\site-packages (from pyppeteer) (1.4.4)
Collecting websockets<11.0,>=10.0
Downloading websockets-10.4-cp39-cp39-win_amd64.whl (101 kB)
Requirement already satisfied: tqdm<5.0.0,>=4.42.1 in c:\users\hp\anaconda3
\lib\site-packages (from pyppeteer) (4.64.0)
Collecting pyee<9.0.0,>=8.1.0
Downloading pyee-8.2.2-py2.py3-none-any.whl (12 kB)
Requirement already satisfied: urllib3<2.0.0,>=1.25.8 in c:\users\hp\anacon
da3\lib\site-packages (from pyppeteer) (1.26.9)
Requirement already satisfied: importlib-metadata>=1.4 in c:\users\hp\anaco
nda3\lib\site-packages (from pyppeteer) (4.11.3)
Requirement already satisfied: zipp>=0.5 in c:\users\hp\anaconda3\lib\site-
packages (from importlib-metadata>=1.4->pyppeteer) (3.7.0)
Requirement already satisfied: colorama in c:\users\hp\anaconda3\lib\site-p
ackages (from tqdm<5.0.0,>=4.42.1->pyppeteer) (0.4.4)
Installing collected packages: websockets, pyee, pyppeteer
Successfully installed pyee-8.2.2 pyppeteer-1.0.2 websockets-10.4
Note: you may need to restart the kernel to use updated packages.

You might also like