You are on page 1of 4

Grouping

It is used to group one or more columns in a dataframe by using


the groupby() method. Groupby mainly refers to a process involving one
or more of the following steps they are:
•Splitting: It is a process in which we split data into group by applying
some conditions on datasets.
•Applying: It is a process in which we apply a function to each group
independently
•Combining: It is a process in which we combine different datasets after
applying groupby and results in a data structure
# importing pandas as pd for using data frame
import pandas as pd

# creating dataframe with student details


dataframe = pd.DataFrame({'id': [7058, 4511, 7014, 7033],
'name': ['sravan', 'manoj', 'aditya', 'bhanu'],
'Maths_marks': [99, 97, 88, 90],
'Chemistry_marks': [89, 99, 99, 90],
'telugu_marks': [99, 97, 88, 80],
'hindi_marks': [99, 97, 56, 67],
'social_marks': [79, 97, 78, 90], })

# group by name
print(dataframe.groupby('name').first())

print("---------------------------------")
# group by name with social_marks sum
print(dataframe.groupby('name')['social_marks'].sum())
print("---------------------------------")

# group by name with maths_marks count


print(dataframe.groupby('name')['Maths_marks'].count())
print("---------------------------------")

# group by name with maths_marks


print(dataframe.groupby('name')['Maths_marks'])
# importing pandas as pd for using data frame
import pandas as pd
# creating dataframe with student details
dataframe = pd.DataFrame({'id': [7058, 4511, 7014, 7033],
'name': ['sravan', 'manoj', 'aditya', 'bhanu'],
'Maths_marks': [99, 97, 88, 90],
'Chemistry_marks': [89, 99, 99, 90],
'telugu_marks': [99, 97, 88, 80],
'hindi_marks': [99, 97, 56, 67],
'social_marks': [79, 97, 78, 90], })

# group by name
print(dataframe.groupby('name').first())

print("------------------------")
# group by name with social_marks sum
print(dataframe.groupby('name')['social_marks'].sum())
print("------------------------")
# group by name with maths_marks count
print(dataframe.groupby('name')['Maths_marks'].count())

Example3:

# import module
import numpy as np
import pandas as pd

# reading csv file


dataset = pd.read_csv("diamonds.csv")

# printing first 5 rows


print(dataset.head(5))

dataset.groupby('cut').sum()

dataset.groupby(['cut', 'color']).agg('min')

agg_functions = {'price':
['sum', 'mean', 'median', 'min', 'max', 'prod']
}

dataset.groupby(['color']).agg(agg_functions)
>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
... 'Parrot', 'Parrot'],
... 'Max Speed': [380., 370., 24., 26.]})
>>> df
Animal Max Speed
0 Falcon 380.0
1 Falcon 370.0
2 Parrot 24.0
3 Parrot 26.0
>>> df.groupby(['Animal']).mean()
Max Speed
Animal
Falcon 375.0
Parrot 25.0

>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],


... ['Captive', 'Wild', 'Captive', 'Wild']]
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
... index=index)
>>> df
Max Speed
Animal Type
Falcon Captive 390.0
Wild 350.0
Parrot Captive 30.0
Wild 20.0
>>> df.groupby(level=0).mean()
Max Speed
Animal
Falcon 370.0
Parrot 25.0
>>> df.groupby(level="Type").mean()
Max Speed
Type
Captive 210.0
Wild 185.0

>>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
>>> df = pd.DataFrame(l, columns=["a", "b", "c"])

>>> df.groupby(by=["b"]).sum()
a c
b
1.0 2 3
2.0 2 5
>>> df.groupby(by=["b"], dropna=False).sum()
a c
b
1.0 2 3
2.0 2 5
NaN 1 4

>>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
>>> df = pd.DataFrame(l, columns=["a", "b", "c"])

>>> df.groupby(by="a", dropna=False).sum()


b c
a
a 13.0 13.0
b 12.3 123.0
NaN 12.3 33.0

>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',


... 'Parrot', 'Parrot'],
... 'Max Speed': [380., 370., 24., 26.]})
>>> df.groupby("Animal", group_keys=True).apply(lambda x: x)
Animal Max Speed
Animal
Falcon 0 Falcon 380.0
1 Falcon 370.0
Parrot 2 Parrot 24.0
3 Parrot 26.0

>>> df.groupby("Animal", group_keys=False).apply(lambda x: x)


Animal Max Speed
0 Falcon 380.0
1 Falcon 370.0
2 Parrot 24.0
3 Parrot 26.0

You might also like