Python Pandas Handsons

Join our channel if you haven’t joined yet https://t.
me/fresco_milestone ( @fresco_milestone )
Python Pandas HandsOns
1. Pandas Data Structures
import pandas as pd
importnumpy as np
heights_A= pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

heights_A.index = ['s1', 's2', 's3', 's4','s5']
print(heights_A.shape)
weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])

weights_A.index = ['s1', 's2', 's3', 's4','s5']
print(weights_A.dtypes)
df_A = pd.DataFrame()
df_A['Student_height'] = heights_A
df_A['Student_weight'] = weights_A
print(df_A.shape)
my_mean = 170.0
my_std = 25.0
np.random.seed(100)
heights_B= pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
heights_B.index = ['s1', 's2', 's3', 's4','s5']
my_mean1 = 75.0
my_std1 = 12.0
weights_B =pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
weights_B.index = ['s1', 's2', 's3', 's4','s5']
print(heights_B.mean())
df_B = pd.DataFrame()
df_B['Student_height'] = heights_B
df_B['Student_weight'] = weights_B
print(df_B.columns.values.tolist()
2. Accessing Pandas Data Structures
#Write your code here

import pandas as pd
import numpy as np
heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

print(heights_A[1])
print(heights_A[[1,2,3]])

Join our channel if you haven’t joined yet https://t.me/fresco_milestone ( @fresco_milestone )
height = df_A['Student_height']
print(type(height))
df_s1s2 = df_A[df_A.index.isin(['s1','s2'])]
print(df_s1s2)
df_s2s5s1 = df_A[df_A.index.isin(['s1','s2','s5'])]
df_s2s5s1 = df_s2s5s1.reindex(['s2', 's5', 's1'])
print(df_s2s5s1)
df_s1s4 = df_A[df_A.index.isin(['s1','s4'])]
print(df_s1s4)
3. Working with CSV files

import pandas as pd
import numpy as np


df_A.to_csv('classA.csv')
df_A2 = pd.read_csv('classA.csv')
print(df_A2)
df_A3 = pd.read_csv('classA.csv',index_col='Unnamed: 0')

print(df_A3)
my_mean = 170.0
my_std = 25.0
np.random.seed(100)
heights_B = pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
my_mean1 = 75.0
my_std1 = 12.0
np.random.seed(100)
weights_B = pd.Series(np.random.normal(loc=my_mean1, scale=my_std1, size=5))
df_B.to_csv('classB.csv', index=False)
df_B2 = pd.read_csv('classB.csv')
print(df_B2)
df_B3 = pd.read_csv('classB.csv',header=None)
print(df_B3)
df_B4 = pd.read_csv('classB.csv',header=None,skiprows=2)
print(df_B4)
4. Indexing Dataframes

import pandas as pd
import numpy as np
DatetimeIndex = pd.date_range(start='09/1/2017', end='09/15/2017')

print(DatetimeIndex[2])
datelist = ['14-Sep-2017', '9-Sep-2017']

dates_to_be_searched = pd.to_datetime(datelist)
print(dates_to_be_searched)
print(dates_to_be_searched.isin(DatetimeIndex))
arraylist = [['classA']*5 + ['classB']*5, ['s1', 's2', 's3','s4', 's5']*2]

mi_index = pd.MultiIndex.from_product(arraylist, names=['First Level','Second Level'])
print(mi_index.levels)
5. Data Cleaning

import pandas as pd
import numpy as np


df_A.loc['s3'] = np.nan
df_A.loc['s5'][1] = np.nan
df_A2 = df_A.dropna(how ='any')

print(df_A2)
6. Data Aggregation

import pandas as pd
import numpy as np


df_A_filter1 = df_A[(df_A.Student_height > 160.0) & (df_A.Student_weight < 80.0)]

print(df_A_filter1)
df_A_filter2 = df_A[df_A.index.isin(['s5'])]
print(df_A_filter2)
df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']

df_groups = df_A.groupby('Gender')
print(df_groups.mean())
7. Data Merge 1

import pandas as pd
import numpy as np


df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']
s = pd.Series([165.4, 82.7, 'F'],index=['Student_height', 'Student_weight', 'Gender'],name='s6')
df_AA = df_A.append(s)
print(df_AA)
my_mean = 170.0
my_std = 25.0
np.random.seed(100)
heights_B = pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5))
my_mean1 = 75.0
my_std1 = 12.0
np.random.seed(100)
weights_B = pd.Series(np.random.normal(loc=my_mean1, scale=my_std1, size=5))
df_B.index = [ 's7', 's8', 's9', 's10', 's11']

df_B['Gender'] = ['F', 'M', 'F', 'F', 'M']
df = pd.concat([df_AA,df_B])
print(df)
8. Data Merge – 2

import pandas as pd
import numpy as np
nameid = pd.Series(range(101, 111))

name = pd.Series(['person' + str(i) for i in range(1, 11)])
master = pd.DataFrame()
master['nameid'] = nameid
master['name'] = name
transaction = pd.DataFrame({'nameid':[108, 108, 108,103], 'product':['iPhone', 'Nokia', 'Micromax', 'Viv

o']})
mdf = pd.merge(master,transaction,on='nameid')
print(mdf)

Python Pandas Handsons

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Python Pandas Handsons

Uploaded by

Copyright:

Available Formats

Join our channel if you haven’t joined yet https://t.

Python Pandas HandsOns

1. Pandas Data Structures

heights_A= pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])

2. Accessing Pandas Data Structures

#Write your code here

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])

3. Working with CSV files

#Write your code here

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])

df_A3 = pd.read_csv('classA.csv',index_col='Unnamed: 0')

heights_B.index = ['s1', 's2', 's3', 's4','s5']

#Write your code here

DatetimeIndex = pd.date_range(start='09/1/2017', end='09/15/2017')

datelist = ['14-Sep-2017', '9-Sep-2017']

arraylist = [['classA']*5 + ['classB']*5, ['s1', 's2', 's3','s4', 's5']*2]

#Write your code here

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])

df_A2 = df_A.dropna(how ='any')

#Write your code here

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])

df_A_filter1 = df_A[(df_A.Student_height > 160.0) & (df_A.Student_weight < 80.0)]

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']

#Write your code here

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4])

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9])

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F']

s = pd.Series([165.4, 82.7, 'F'],index=['Student_height', 'Student_weight', 'Gender'],name='s6')

df_B.index = [ 's7', 's8', 's9', 's10', 's11']

#Write your code here

nameid = pd.Series(range(101, 111))

transaction = pd.DataFrame({'nameid':[108, 108, 108,103], 'product':['iPhone', 'Nokia', 'Micromax', 'Viv

You might also like

arraylist = [['classA']5 + ['classB']5, ['s1', 's2', 's3','s4', 's5']*2]