You are on page 1of 5

https://colab.research.google.

com/drive/
1LFT07yiCQfTnBpo4j6L4vb7GLydHWW8s?usp=sharing
Access this link for the code in google collab.

import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

ice_cream_data = pd.read_csv('ice.cream.data.csv')
ice_cream_data.head()

ice_cream_data.shape

ice_cream_data = ice_cream_data[(ice_cream_data.Unit.isin(["000
tonnes","USD million"]))]

ice_cream_data.head()

# Selecting the required columns from dataset


ice_cream_data = ice_cream_data[['Region','Country', 'Data
Type','Unit','Unit Multiplier',
'2000', '2001', '2002', '2003',
'2004', '2005', '2006', '2007',
'2008', '2009', '2010', '2011',
'2012', '2013', '2014', '2015',
'2016', '2017', '2018', '2019']]

# Converting data basis on unit multiplier i.e. converting volume data


to KG and value data to US dollars
ice_cream_data[['2000', '2001', '2002', '2003', '2004', '2005', '2006',
'2007',
'2008', '2009', '2010', '2011', '2012', '2013', '2014',
'2015',
'2016', '2017', '2018', '2019']] =
ice_cream_data[['2000', '2001', '2002', '2003', '2004', '2005',
'2006', '2007',
'2008', '2009', '2010', '2011', '2012', '2013', '2014',
'2015',
'2016', '2017', '2018',
'2019']].multiply(ice_cream_data["Unit Multiplier"], axis='index')
# Droping Unit and Unit multiplier columns

ice_cream_data.drop(['Unit', 'Unit Multiplier'], axis=1, inplace=True)

#Converting data into analysis friendly using melt function.


reshaped_ice_cream_data
=pd.melt(ice_cream_data,id_vars=['Region','Country','Data
Type'],var_name='Year',value_name='Value')
final_df=reshaped_ice_cream_data.pivot_table(index=['Region','Country',
'Year'],columns='Data Type',values='Value',aggfunc='sum')
final_df.reset_index(inplace=True)

final_df=final_df.drop('Total Volume (Tonnes)',axis=1)


final_df.head(20)

#Renaming variables to Ice Cream Value and Ice Cream Volume.


final_df = final_df.rename(columns={'Retail Value RSP':
'Ice_cream_value', 'Retail Volume (Tonnes)': 'Ice_cream_volume'})
final_df.head()

#Calculate Ice Cream Price


final_df['Ice Cream Price'] = final_df.Ice_cream_value *
final_df.Ice_cream_volume

#Calculating total volume by region.


result = final_df.groupby(['Year', 'Region'])['Ice_cream_volume'].sum()

#Calculating the second largest region by ice cream volume in 2018.


second_largest_region_2018 = result.nlargest(2).index[1]
print("The second largest region by ice cream volume in 2018 is
{second_largest_region_2018}")

macro_data = pd.read_csv('macro.data.csv')

macro_data_sub = macro_data[(macro_data.Subcategory.isin([" Total


GDP"," Possession of Refrigerator","Population: National Estimates at
January 1st"]))]

macro_data = macro_data[['Region','Country', 'Subcategory','Unit','Unit


Multiplier',
'1977',
'1978','1979','1980','1981','1982','1983','1984','1985','1986','1987','
1988','1989',
'1990','1991','1992','1993','1994','1995','1996','1997','1998','1999','
2000', '2001', '2002',
'2003','2004', '2005', '2006',
'2007','2008', '2009', '2010', '2011', '2012', '2013', '2014',
'2015','2016', '2017', '2018',
'2019','2020','2021','2022','2023','2024','2025','2026','2027','2028','
2029','2030']]

# Converting data basis on unit multiplier i.e. converting volume data


to KG and value data to US dollars
macro_data[['1977',
'1978','1979','1980','1981','1982','1983','1984','1985','1986','1987','
1988','1989',

'1990','1991','1992','1993','1994','1995','1996','1997','1998','1999','
2000', '2001', '2002',
'2003','2004', '2005', '2006',
'2007','2008', '2009', '2010', '2011', '2012', '2013', '2014',
'2015','2016', '2017', '2018',
'2019','2020','2021','2022','2023','2024','2025','2026','2027','2028','
2029','2030']] = macro_data[['1977',
'1978','1979','1980','1981','1982','1983','1984','1985','1986','1987','
1988','1989',

'1990','1991','1992','1993','1994','1995','1996','1997','1998','1999','
2000', '2001', '2002',
'2003','2004', '2005', '2006',
'2007','2008', '2009', '2010', '2011', '2012', '2013', '2014',
'2015','2016', '2017', '2018',
'2019','2020','2021','2022','2023','2024','2025','2026','2027','2028','
2029','2030']].multiply(macro_data["Unit Multiplier"], axis='index')

# Droping Unit and Unit multiplier columns

macro_data.drop(['Unit', 'Unit Multiplier'], axis=1, inplace=True)

#Converting data into analysis friendly using melt function.


reshaped_macro_data
=pd.melt(macro_data,id_vars=['Region','Country','Subcategory'],var_name
='Year',value_name='Value')
final_df2=reshaped_macro_data.pivot_table(index=['Region','Country','Ye
ar'],columns='Subcategory',values='Value',aggfunc='sum')
final_df2.reset_index(inplace=True)

# Combine the data


combined_df = pd.merge(final_df, final_df2, on=['Region', 'Country',
'Year'])

# Subset data to years which are present in Ice Cream Data


combined_df =
combined_df[combined_df['Year'].isin(final_df['Year'].unique())]

def convert_gdp(row):
# Assuming a similar conversion logic is needed for GDP
# ...
return row

combined_df.head(20)

# Calculate per capita variables


combined_df.columns.columns.to_list['GDP per Capita'] =
combined_df['Total GDP'] / combined_df['Population: National Estimates
at January 1st']

You might also like