You are on page 1of 10

Infant Mortality Rates OECD

September 15, 2020

0.0.1 Data Sources


https://data.oecd.org/healthstat/infant-mortality-rates.htm#indicator-chart
https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv

0.0.2 Imports

[1]: import pandas as pd


import matplotlib.pyplot as plt
%matplotlib inline

[2]: import plotly.graph_objs as go


from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf

0.1 Importing Data and EDA


[3]: data = pd.read_csv('InfantMortalityRates.csv')

[4]: data.head()

[4]: LOCATION INDICATOR SUBJECT MEASURE FREQUENCY TIME Value \


0 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1960 20.2
1 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1961 19.5
2 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1962 20.4
3 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1963 19.5
4 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1964 19.1

Flag Codes
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN

[5]: data.describe()

1
[5]: TIME Value
count 2365.000000 2365.000000
mean 1988.661311 20.369725
std 16.229813 24.748055
min 1960.000000 0.700000
25% 1975.000000 5.600000
50% 1989.000000 11.900000
75% 2003.000000 22.800000
max 2016.000000 165.100000

[6]: data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2365 entries, 0 to 2364
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 LOCATION 2365 non-null object
1 INDICATOR 2365 non-null object
2 SUBJECT 2365 non-null object
3 MEASURE 2365 non-null object
4 FREQUENCY 2365 non-null object
5 TIME 2365 non-null int64
6 Value 2365 non-null float64
7 Flag Codes 7 non-null object
dtypes: float64(1), int64(1), object(6)
memory usage: 147.9+ KB

[7]: data.groupby('LOCATION').count()

[7]: INDICATOR SUBJECT MEASURE FREQUENCY TIME Value Flag Codes


LOCATION
AUS 57 57 57 57 57 57 0
AUT 57 57 57 57 57 57 0
BEL 57 57 57 57 57 57 0
BRA 56 56 56 56 56 56 0
CAN 55 55 55 55 55 55 0
CHE 57 57 57 57 57 57 0
CHL 56 56 56 56 56 56 0
CHN 47 47 47 47 47 47 0
COL 56 56 56 56 56 56 0
CRI 51 51 51 51 51 51 0
CZE 57 57 57 57 57 57 1
DEU 57 57 57 57 57 57 0
DNK 57 57 57 57 57 57 0
ESP 57 57 57 57 57 57 0
EST 57 57 57 57 57 57 0

2
FIN 57 57 57 57 57 57 0
FRA 57 57 57 57 57 57 2
GBR 57 57 57 57 57 57 1
GRC 57 57 57 57 57 57 0
HUN 57 57 57 57 57 57 0
IDN 56 56 56 56 56 56 0
IND 56 56 56 56 56 56 0
IRL 57 57 57 57 57 57 0
ISL 57 57 57 57 57 57 0
ISR 47 47 47 47 47 47 0
ITA 57 57 57 57 57 57 0
JPN 57 57 57 57 57 57 0
KOR 23 23 23 23 23 23 0
LTU 57 57 57 57 57 57 0
LUX 57 57 57 57 57 57 0
LVA 57 57 57 57 57 57 0
MEX 55 55 55 55 55 55 0
NLD 57 57 57 57 57 57 0
NOR 57 57 57 57 57 57 0
NZL 55 55 55 55 55 55 0
POL 57 57 57 57 57 57 0
PRT 57 57 57 57 57 57 0
RUS 37 37 37 37 37 37 2
SVK 57 57 57 57 57 57 1
SVN 57 57 57 57 57 57 0
SWE 57 57 57 57 57 57 0
TUR 21 21 21 21 21 21 0
USA 56 56 56 56 56 56 0
ZAF 42 42 42 42 42 42 0

[8]: data[(data['LOCATION'] == 'KOR') | (data['LOCATION'] == 'TUR')]

[8]: LOCATION INDICATOR SUBJECT MEASURE FREQUENCY TIME \


853 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1970
854 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1981
855 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1984
856 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1985
857 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1987
858 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1989
859 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1991
860 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1993
861 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1996
862 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 1999
863 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2002
864 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2005
865 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2006
866 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2007

3
867 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2008
868 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2009
869 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2010
870 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2011
871 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2012
872 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2013
873 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2014
874 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2015
875 KOR INFANTMORTALITY TOT DEATH_1000BIRTH A 2016
1499 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 1996
1500 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 1997
1501 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 1998
1502 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 1999
1503 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2000
1504 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2001
1505 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2002
1506 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2003
1507 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2004
1508 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2005
1509 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2006
1510 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2007
1511 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2008
1512 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2009
1513 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2010
1514 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2011
1515 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2012
1516 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2013
1517 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2014
1518 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2015
1519 TUR INFANTMORTALITY TOT DEATH_1000BIRTH A 2016

Value Flag Codes


853 45.0 NaN
854 17.0 NaN
855 16.0 NaN
856 13.0 NaN
857 13.0 NaN
858 12.0 NaN
859 10.0 NaN
860 9.9 NaN
861 7.7 NaN
862 6.2 NaN
863 5.3 NaN
864 4.7 NaN
865 4.1 NaN
866 3.5 NaN
867 3.4 NaN

4
868 3.2 NaN
869 3.2 NaN
870 3.0 NaN
871 2.9 NaN
872 3.0 NaN
873 3.0 NaN
874 2.7 NaN
875 2.8 NaN
1499 40.9 NaN
1500 38.8 NaN
1501 36.5 NaN
1502 33.9 NaN
1503 28.4 NaN
1504 28.6 NaN
1505 29.6 NaN
1506 29.1 NaN
1507 27.4 NaN
1508 25.8 NaN
1509 24.5 NaN
1510 16.5 NaN
1511 15.7 NaN
1512 13.9 NaN
1513 12.0 NaN
1514 11.6 NaN
1515 11.6 NaN
1516 10.8 NaN
1517 11.1 NaN
1518 10.2 NaN
1519 10.0 NaN

[9]: country_names = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/


,→master/2014_world_gdp_with_codes.csv')

[10]: country_names.columns = ['COUNTRY', 'GDP (BILLIONS)', 'LOCATION']

[11]: country_names.head()

[11]: COUNTRY GDP (BILLIONS) LOCATION


0 Afghanistan 21.71 AFG
1 Albania 13.40 ALB
2 Algeria 227.80 DZA
3 American Samoa 0.75 ASM
4 Andorra 4.80 AND

[12]: data_plus = pd.merge(data, country_names, on='LOCATION', how='left')

[13]: data_plus.head()

5
[13]: LOCATION INDICATOR SUBJECT MEASURE FREQUENCY TIME Value \
0 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1960 20.2
1 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1961 19.5
2 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1962 20.4
3 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1963 19.5
4 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1964 19.1

Flag Codes COUNTRY GDP (BILLIONS)


0 NaN Australia 1483.0
1 NaN Australia 1483.0
2 NaN Australia 1483.0
3 NaN Australia 1483.0
4 NaN Australia 1483.0

[14]: data_plus.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2365 entries, 0 to 2364
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 LOCATION 2365 non-null object
1 INDICATOR 2365 non-null object
2 SUBJECT 2365 non-null object
3 MEASURE 2365 non-null object
4 FREQUENCY 2365 non-null object
5 TIME 2365 non-null int64
6 Value 2365 non-null float64
7 Flag Codes 7 non-null object
8 COUNTRY 2365 non-null object
9 GDP (BILLIONS) 2365 non-null float64
dtypes: float64(2), int64(1), object(7)
memory usage: 203.2+ KB

0.2 Plotly
[15]: #Use Plotly in offline mode
init_notebook_mode(connected=True)
cf.go_offline()

[16]: firstyear = 1980


years = list(range(firstyear, 2016))

# make list of continents


countries = []
for country in data_plus['COUNTRY']:
if country not in countries:

6
countries.append(country)
# make figure
figure = {
'data': [],
'layout': {},
'frames': []
}

# fill in most of layout


figure['layout']={'autosize': False,
'width':1000,
'height':1000}
figure['layout']['xaxis'] = {'title': 'Infant Mortality per 1000 Live Births',
'rangemode':'tozero',
'range':[0,100]}
figure['layout']['yaxis'] = {'ticktext': countries}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {'args': ['transition', {'duration': 400,
'easing':␣
,→'cubic-in-out'}],

'initialValue': str(firstyear),
'plotlycommand': 'animate',
'values': years,
'visible': True}
figure['layout']['updatemenus'] = [{
'buttons': [{'args': [None, {'frame': {'duration': 500,
'redraw': False},
'fromcurrent': True,
'transition': {'duration': 300,
'easing': 'quadratic-in-out'}}],
'label': 'Play',
'method': 'animate'},
{'args': [[None], {'frame': {'duration': 0,
'redraw': False},
'mode': 'immediate',
'transition': {'duration': 0}}],
'label': 'Pause',
'method': 'animate'}],
'direction': 'left',
'pad': {'r': 10, 't': 87},
'showactive': False,
'type': 'buttons',
'x': 0.1,
'xanchor': 'right',
'y': 0,
'yanchor': 'top'}]

7
sliders_dict = {
'active': 0,
'yanchor': 'top',
'xanchor': 'left',
'currentvalue': {
'font': {'size': 20},
'prefix': 'Year:',
'visible': True,
'xanchor': 'right'},
'transition': {'duration': 300, 'easing': 'cubic-in-out'},
'pad': {'b': 10, 't': 50},
'len': 0.9,
'x': 0.1,
'y': 0,
'steps': []}

# make data
# make frames
for year in years:
frame = {'data': [], 'name': str(year)}
for country in countries:
dataset_by_year = data_plus[data_plus['TIME'] == int(year)]
dataset_by_year_and_cont = dataset_by_year[dataset_by_year['COUNTRY']␣
,→== country]

if dataset_by_year_and_cont.empty:
data_dict = {
'x': [0],
'y': [country],
'mode': 'markers',
'text': [0],
'name': country}
else:
data_dict = {
'x': list(dataset_by_year_and_cont['Value']),
'y': list(dataset_by_year_and_cont['COUNTRY']),
'mode': 'markers',
'text': list(dataset_by_year_and_cont['Value']),
'name': country}

if year == firstyear:
figure['data'].append(data_dict)
frame['data'].append(data_dict)

figure['frames'].append(frame)
slider_step = {'args': [[year],
{'frame': {'duration': 300, 'redraw': False},

8
'mode': 'immediate',
'transition': {'duration': 300}}],
'label': year,
'method': 'animate'}
sliders_dict['steps'].append(slider_step)

figure['layout']['sliders'] = [sliders_dict]
# figure['data']

plot(figure, filename = 'animation.html')

[16]: 'animation.html'

0.3 Plotly Express


[18]: import plotly.express as px

[19]: data_xp = data_plus[data_plus['TIME']>1979].copy()

[20]: data_xp = data_xp[data_xp['LOCATION']!= 'KOR']

[21]: data_xp = data_xp[data_xp['LOCATION']!= 'TUR']

[22]: data_xp = data_xp[data_xp['TIME'] < 2016]

[23]: data_xp.head()

[23]: LOCATION INDICATOR SUBJECT MEASURE FREQUENCY TIME Value \


20 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1980 10.7
21 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1981 10.0
22 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1982 10.3
23 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1983 9.6
24 AUS INFANTMORTALITY TOT DEATH_1000BIRTH A 1984 9.2

Flag Codes COUNTRY GDP (BILLIONS)


20 NaN Australia 1483.0
21 NaN Australia 1483.0
22 NaN Australia 1483.0
23 NaN Australia 1483.0
24 NaN Australia 1483.0

[24]: fig = px.scatter(data_xp, x="Value", y="COUNTRY", color = "COUNTRY",␣


,→hover_name="COUNTRY",

animation_frame="TIME", animation_group="COUNTRY", range_x=[0,100],


labels=dict(COUNTRY="Country", Value="Infant Mortality Rate"))

9
fig.update(layout = {'autosize': False, 'width':1000,'height':1000})

[26]: chorfig = px.choropleth(data_xp, locations="LOCATION",color="Value",␣


,→hover_name="COUNTRY", animation_frame="TIME",

color_continuous_scale=px.colors.sequential.deep, scope = "world")


plot(chorfig, filename = 'chorfig.html')

[26]: 'chorfig.html'

[27]: plot(fig, filename = 'ani_express.html')

[27]: 'ani_express.html'

[ ]:

10

You might also like