You are on page 1of 7

ADVANCED OPERATIONS ON DATAFRAME

PIVOTING:
It rearranges the data from rows and columns by possibly rotating
rows and columns or by aggregating data from multiple sources in a
report form.
The arguments of Pivot() function are
i. Index – stores the column name about which the information is to
be summarised(will become rows in the result)
ii. Columns- stores the column name whose data will become a
column each in the summary information(will become columns in
the result)
iii. Values- stores the column name whose data will be displayed
for the index, column combination( will become cells in the result)

import pandas as pd
d1={'students':['Arun','Neha','Supreet','Usha','Mukesh'],'marks':
[78.5,83.4,65.6,92.2,88.6], 'sport':
['Cricket','Badminton','Football','Atheletics','Kabaddi']}
df1=pd.DataFrame(d1)
print(df1)
students marks sport
0 Arun 78.5 Cricket
1 Neha 83.4 Badminton
2 Supreet 65.6 Football
3 Usha 92.2 Atheletics
4 Mukesh 88.6 Kabaddi
After pivoting
import pandas as pd
d1={'students':['Arun','Neha','Supreet','Usha','Mukesh'],'marks': [78.5,83.4,65.6,92.2,88.6],
'sport':['Cricket','Badminton','Football','Atheletics','Kabaddi']}
df1=pd.DataFrame(d1)
print(df1)

print(df1.pivot(index='sport',columns='students', values='marks'))
After pivoting
OUTPUT

students Arun Mukesh Neha Supreet Usha


sport
Atheletics NaN NaN NaN NaN 92.2
Badminton NaN NaN 83.4 NaN NaN
Cricket 78.5 NaN NaN NaN NaN
Football NaN NaN NaN 65.6 NaN
Kabaddi NaN 88.6 NaN NaN NaN

Filling NA values
import pandas as pd
d1={'students':['Arun','Neha','Supreet','Usha','Mukesh'],'marks':
[78.5,83.4,65.6,92.2,88.6], 'sport':
['Cricket','Badminton','Football','Atheletics','Kabaddi']}
df1=pd.DataFrame(d1)
print(df1)
print(df1.pivot(index='sport',columns='students', values='marks').fillna(10))

OUTPUT:
students Arun Mukesh Neha Supreet Usha
sport
Atheletics 10.0 10.0 10.0 10.0 92.2
Badminton 10.0 10.0 83.4 10.0 10.0
Cricket 78.5 10.0 10.0 10.0 10.0
Football 10.0 10.0 10.0 65.6 10.0
Kabaddi 10.0 88.6 10.0 10.0 10.0

HANDLING MISSING DATA -FILLING MISSING VALUES IN A DATAFRAME


import pandas as pd
import numpy as np
d1={'students':['Arun','Neha','Supreet','Usha','Mukesh'],'marks':
[78.5,np.NaN,65.6,92.2,np.NaN], 'sport':
['Cricket','Badminton','Football','Atheletics','Kabaddi']}
df1=pd.DataFrame(d1)
print(df1)
print(df1.isnull())

students marks sport


0 Arun 78.5 Cricket
1 Neha NaN Badminton
2 Supreet 65.6 Football print(df1)
3 Usha 92.2 Atheletics
4 Mukesh NaN Kabaddi

students marks sport


0 False False False
1 False True False print(df1.isnull())
2 False False False
3 False False False
4 False True False

DROPPING MISSING VALUES


import pandas as pd
import numpy as np
d1={'students':['Arun','Neha','Supreet','Usha','Mukesh'],'marks':
[78.5,np.NaN,65.6,92.2,np.NaN], 'sport':
['Cricket','Badminton','Football','Atheletics','Kabaddi']}
df1=pd.DataFrame(d1)
print(df1)
print(df1.isnull())
print(df1.dropna())

students marks sport


0 Arun 78.5 Cricket
1 Neha NaN Badminton
2 Supreet 65.6 Football
3 Usha 92.2 Atheletics
4 Mukesh NaN Kabaddi

students marks sport


0 False False False
1 False True False print(df1.isnull())
2 False False False
3 False False False
4 False True False

Students marks sport


0 Arun 78.5 Cricket print(df1.dropna())
2 Supreet 65.6 Football
3 Usha 92.2 Atheletics

FILLING MISSING VALUES


import pandas as pd
import numpy as np
d1={'students':['Arun','Neha','Supreet','Usha','Mukesh'],'marks':
[78.5,np.NaN,65.6,92.2,np.NaN], 'sport':
['Cricket','Badminton','Football','Atheletics','Kabaddi']}
df1=pd.DataFrame(d1)
print(df1)
print(df1.isnull())
print(df1.fillna(555))

students marks sport


0 Arun 78.5 Cricket
1 Neha NaN Badminton
2 Supreet 65.6 Football
3 Usha 92.2 Atheletics
4 Mukesh NaN Kabaddi
students marks sport
0 False False False
1 False True False
2 False False False
3 False False False
4 False True False

students marks sport


0 Arun 78.5 Cricket
1 Neha 555.0 Badminton print(df1.fillna(555))
2 Supreet 65.6 Football
3 Usha 92.2 Atheletics
4 Mukesh 555.0 Kabaddi
MERGE()
import pandas as pd
d1={'students':['Arun','Neha','Supreet','Usha','Mukesh'], 'sport':
['Cricket','Badminton','Football','Atheletics','Kabaddi']}
df1=pd.DataFrame(d1)
d2={'students':['Neha','Usha','Mukesh'], 'cultural':['Music','dance','drawing']}
df2=pd.DataFrame(d2)
print(df1)
print(df2)
print(pd.merge(df1,df2))
students sport
0 Arun Cricket
1 Neha Badminton
2 Supreet Football
3 Usha Atheletics
4 Mukesh Kabaddi
students cultural
0 Neha Music
1 Usha dance
2 Mukesh drawing

students sport cultural


0 Neha Badminton Music print(pd.merge(df1,df2))
1 Usha Atheletics dance
2 Mukesh Kabaddi drawing

You might also like