You are on page 1of 4

Pandas

1. Pandas Data Structures - Hands-on

import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']
print(heights_A.shape)
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])
weights_A.index = ['s1', 's2', 's3', 's4','s5']
print(weights_A.dtype)
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
print(df_A.shape)
np.random.seed(100)
x=np.random.normal(loc=170.0,scale=25.0,size=5)
np.random.seed(100)
heights_B=pd.Series(x,index=['s1','s2','s3','s4','s5'])
np.random.seed(100)
y=np.random.normal(loc=75.0,scale=12.0,size=5)
weights_B=pd.Series(y,index=['s1','s2','s3','s4','s5'])
print(heights_B.mean())
df_B = pd.DataFrame({'Student_height': heights_B,'Student_weight':weights_B}, index
= weights_B.index)
print(df_B.columns)

2. Working with CSVs

import pandas as pd
import numpy as np
import os

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']
print(heights_A.shape)
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])
weights_A.index = ['s1', 's2', 's3', 's4','s5']
print(weights_A.dtype)
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
df_A.to_csv('classA.csv')
os.system("cat classA.csv")
df_A2=pd.read_csv('classA.csv')
print(df_A2)
df_A3=pd.read_csv("classA.csv", index_col=0)
print(df_A3)
np.random.seed(100)
x=np.random.normal(loc=170.0,scale=25.0,size=5)
np.random.seed(100)
heights_B=pd.Series(x,index=['s1','s2','s3','s4','s5'])
np.random.seed(100)
y=np.random.normal(loc=75.0,scale=12.0,size=5)
weights_B=pd.Series(y,index=['s1','s2','s3','s4','s5'])
df_B = pd.DataFrame({'Student_height': heights_B,'Student_weight':weights_B}, index
= weights_B.index)
df_B.to_csv("classB.csv",index=False)
os.system("cat classB.csv")

df_B2=pd.read_csv("classB.csv")
print(df_B2)

df_B3=pd.read_csv("classB.csv", header=None)
print(df_B3)

df_B4=pd.read_csv("classB.csv", header=None, skiprows=2)


print(df_B4)

3. Hands-on with Indexes

import pandas as pd

dates = pd.date_range(start= '2017/09/01', end='2017/09/15')


print(dates[2])
datelist = ['14-Sep-2017', '9-Sep-2017']
dates_to_be_searched = pd.to_datetime(datelist)
print(dates_to_be_searched)
print(dates_to_be_searched.isin(dates))
arraylist = [['classA']*5 + ['classB']*5, ['s1', 's2', 's3', 's4', 's5']*2]

mi_index = pd.MultiIndex.from_arrays(arraylist)
print(mi_index.levels)

4. Access Elements in Data Structures

#Write your code here


import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']
print(heights_A[1])
print(heights_A[[1,2,3]])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
height = df_A['Student_height']
print(type(height))

df_s1s2 = df_A.iloc[[0,1]]
print(df_s1s2)

df_s2s5s1 = df_A.iloc[[1,4,0]]
print(df_s2s5s1)

df_s1s4 = df_A.loc[(df_A.index.str.endswith('1') | df_A.index.str.endswith('4'))]


print(df_s1s4)

5. Data Cleaning - Handson

#Write your code here


import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A

df_A.loc['s3'] = np.nan
df_A.loc['s5'][1]= np.nan

df_A2 = df_A.dropna(how ='any')


print(df_A2)

6. Data Aggregation - Handson

import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A

df_A_filter1 = df_A[(df_A.Student_height > 160.0) & (df_A.Student_weight < 80.0)]


print(df_A_filter1)

df_A_filter2 = df_A[df_A.index.isin(['s5'])]
print(df_A_filter2)

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']


df_groups = df_A.groupby('Gender')
print(df_groups.mean())

7.

import pandas as pd
import numpy as np

nameid = pd.Series(range(101, 111))


name = pd.Series(['person' + str(i) for i in range(1, 11)])
master = pd.DataFrame()
master['nameid'] = nameid
master['name'] = name

transaction = pd.DataFrame({'nameid':[108, 108, 108,103], 'product':['iPhone',


'Nokia', 'Micromax', 'Vivo']})
mdf = pd.merge(master,transaction,on='nameid')
print(mdf)

8.
#Write your code here
import pandas as pd
import numpy as np

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])


heights_A.index = ['s1', 's2', 's3', 's4','s5']

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])


weights_A.index = ['s1', 's2', 's3', 's4','s5']
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']


s = pd.Series([165.4, 82.7, 'F'],index=['Student_height', 'Student_weight',
'Gender'],name='s6')

df_AA = df_A.append(s)
print(df_AA)
np.random.seed(100)
x=np.random.normal(loc=170.0,scale=25.0,size=5)
np.random.seed(100)
heights_B=pd.Series(x,index=['s1','s2','s3','s4','s5'])
np.random.seed(100)
y=np.random.normal(loc=75.0,scale=12.0,size=5)
weights_B=pd.Series(y,index=['s1','s2','s3','s4','s5'])
df_B = pd.DataFrame({'Student_height': heights_B,'Student_weight':weights_B}, index
= weights_B.index)
df_B.index = [ 's7', 's8', 's9', 's10', 's11']
df_B['Gender'] = ['F', 'M', 'F', 'F', 'M']

df = pd.concat([df_AA,df_B])
print(df)

You might also like