You are on page 1of 40

Iteration in pandas

Method #1 : Using index attribute of the Dataframe .


# import pandas package as pd
import pandas as pd
# Define a dictionary containing students data
data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka']
'Age': [21, 19, 20, 18],
'Stream': ['Math', 'Commerce', 'Arts', 'Biology'],
'Percentage': [88, 92, 95, 70]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage'])
print("Given Dataframe :\n", df)
print("\nIterating over rows using index attribute :\n")
# iterate through each row and select
# 'Name' and 'Stream' column respectively.
for ind in df.index:
print(df['Name'][ind], df['Stream'][ind])
Method #2 : Using loc[] function of the Dataframe.
# import pandas package as pd
import pandas as pd

# Define a dictionary containing students data


data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka'],
'Age': [21, 19, 20, 18],
'Stream': ['Math', 'Commerce', 'Arts', 'Biology'],
'Percentage': [88, 92, 95, 70]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage'])
print("Given Dataframe :\n", df)
print("\nIterating over rows using loc function :\n")
# iterate through each row and select
# 'Name' and 'Age' column respectively.
for i in range(len(df)) :
print(df.loc[i, "Name"], df.loc[i, "Age"])
Method #3 : Using iloc[] function of the DataFrame.
# import pandas package as pd
import pandas as pd

# Define a dictionary containing students data


data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka'],
'Age': [21, 19, 20, 18],
'Stream': ['Math', 'Commerce', 'Arts', 'Biology'],
'Percentage': [88, 92, 95, 70]}

# Convert the dictionary into DataFrame


df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage'])

print("Given Dataframe :\n", df)

print("\nIterating over rows using iloc function :\n")

# iterate through each row and select


# 0th and 2nd index column respectively.
for i in range(len(df)) :
print(df.iloc[i, 0], df.iloc[i, 2])
Method #4 : Using iterrows() method of the Dataframe.
# import pandas package as pd
import pandas as pd
# Define a dictionary containing students data
data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka'],
'Age': [21, 19, 20, 18],
'Stream': ['Math', 'Commerce', 'Arts', 'Biology'],
'Percentage': [88, 92, 95, 70]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage'])
print("Given Dataframe :\n", df)
print("\nIterating over rows using iterrows() method :\n")
# iterate through each row and select
# 'Name' and 'Age' column respectively.
for index, row in df.iterrows():
print (row["Name"], row["Age"])
Method #5 : Using itertuples() method of the Dataframe.
# import pandas package as pd
import pandas as pd
# Define a dictionary containing students data
data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka'],
'Age': [21, 19, 20, 18],
'Stream': ['Math', 'Commerce', 'Arts', 'Biology'],
'Percentage': [88, 92, 95, 70]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage'])
print("Given Dataframe :\n", df)
print("\nIterating over rows using itertuples() method :\n")

# iterate through each row and select


# 'Name' and 'Percentage' column respectively.
for row in df.itertuples(index = True, name ='Pandas'):
print (getattr(row, "Name"), getattr(row, "Percentage"))
• Method #6 : Using apply() method of the Dataframe.
# import pandas package as pd
import pandas as pd
# Define a dictionary containing students data
data = {'Name': ['Ankit', 'Amit', 'Aishwarya', 'Priyanka'],
'Age': [21, 19, 20, 18],
'Stream': ['Math', 'Commerce', 'Arts', 'Biology'],
'Percentage': [88, 92, 95, 70]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data, columns = ['Name', 'Age', 'Stream', 'Percentage'])
print("Given Dataframe :\n", df)
print("\nIterating over rows using apply function :\n")

# iterate through each row and concatenate


# 'Name' and 'Percentage' column respectively.
print(df.apply(lambda row: row["Name"] + " " + str(row["Percentage"]), axis = 1))
Iterating over rows and columns in Pandas DataFrame

• Iterating over rows :


• In order to iterate over rows, we can use three function iteritems(),
iterrows(), itertuples() . These three function will help in iteration over
rows.
• Iteration over rows using iterrows()
• In order to iterate over rows, we apply a iterrows() function this
function returns each index value along with a series containing the
data in each row.
# importing pandas as pd
import pandas as pd

# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir",
"Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}

# creating a dataframe from a dictionary


df = pd.DataFrame(dict)

print(df)
Now we apply iterrows() function in order to
get a each element of rows.
# importing pandas as pd
import pandas as pd

# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}

# creating a dataframe from a dictionary


df = pd.DataFrame(dict)

# iterating over rows using iterrows() function


for i, j in df.iterrows():
print(i, j)
print()
# importing pandas module
import pandas as pd

# making data frame from csv file


data = pd.read_csv("nba.csv")

# for data visualization we filter


first 3 datasets
data.head(3)
Now we apply a iterrows to get each element
of rows in dataframe
# importing pandas module
import pandas as pd

# making data frame from csv file


data = pd.read_csv("nba.csv")

for i, j in data.iterrows():
print(i, j)
print()
iteration over rows using iteritems()
In order to iterate over rows, we use iteritems() function this
function iterates over each column as key, value pair with the
label as key, and column value as a Series object.
# importing pandas as pd
import pandas as pd

# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir",
"Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}

# creating a dataframe from a dictionary


df = pd.DataFrame(dict)

print(df)
Now we apply a iteritems() function in order
to retrieve an rows of dataframe.
# importing pandas as pd
import pandas as pd

# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}

# creating a dataframe from a dictionary


df = pd.DataFrame(dict)

# using iteritems() function to retrieve rows


for key, value in df.iteritems():
print(key, value)
print()
# importing pandas module
import pandas as pd

# making data frame from csv file


data = pd.read_csv("nba.csv")

# for data visualization we filter


first 3 datasets
data.head(3)
Now we apply a iteritems() in order to
retrieve rows from a dataframe
# importing pandas module
import pandas as pd

# making data frame from csv file


data = pd.read_csv("nba.csv")

for key, value in data.iteritems():


print(key, value)
print()
Iteration over rows using itertuples()
In order to iterate over rows, we apply a function itertuples() this
function return a tuple for each row in the DataFrame. The first
element of the tuple will be the row’s corresponding index value,
while the remaining values are the row values.
# importing pandas as pd
import pandas as pd

# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir",
"Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}

# creating a dataframe from a dictionary


df = pd.DataFrame(dict)

print(df)
Now we apply a itertuples() function inorder
to get tuple for each row
# importing pandas as pd
import pandas as pd

# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}

# creating a dataframe from dictionary


df = pd.DataFrame(dict)

# using a itertuples()
for i in df.itertuples():
print(i)
# importing pandas module
import pandas as pd

# making data frame from csv file


data = pd.read_csv("nba.csv")

# for data visualization we filter


first 3 datasets
data.head(3)
Now we apply an itertuples() to get atuple of
each rows
# importing pandas module
import pandas as pd

# making data frame from csv file


data = pd.read_csv("nba.csv")

for i in data.itertuples():
print(i)
• Iterating over Columns :
• In order to iterate over columns, we need to create a list of dataframe
columns and then iterating through that list to pull out the dataframe
columns.
# importing pandas as pd
import pandas as pd

# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir",
"Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}

# creating a dataframe from a dictionary


df = pd.DataFrame(dict)

print(df)
Now we iterate through columns in order to iterate through
columns we first create a list of dataframe columns and then
iterate through list.
# creating a list of dataframe
columns
columns = list(df)

for i in columns:

# printing the third element


of the column
print (df[i][2])
# importing pandas module
import pandas as pd

# making data frame from csv file


data = pd.read_csv("nba.csv")

# for data visualization we filter first 3


datasets
col = data.head(3)

col
Now we iterate over columns in CSV file in order to iterate over columns we create
a list of dataframe columns and iterate over list

# creating a list of dataframe


columns
clmn = list(col)

for i in clmn:
# printing a third element of
column
print(col[i][2])
create a Dataframe
# import pandas package
import pandas as pd

# List of Tuples
students = [('Ankit', 22, 'A'),
('Swapnil', 22, 'B'),
('Priya', 22, 'B'),
('Shivangi', 22, 'B'),
]
# Create a DataFrame object
stu_df = pd.DataFrame(students, columns =['Name', 'Age',
'Section'],
index =['1', '2', '3', '4'])

stu_df
• Method #1: Using DataFrame.iteritems():
Dataframe class provides a member function iteritems() which gives
an iterator that can be utilized to iterate over all the columns of a data
frame. For every column in the Dataframe it returns an iterator to the
tuple containing the column name and its contents as series.
import pandas as pd
# List of Tuples
students = [('Ankit', 22, 'A'), ('Swapnil', 22, 'B'), ('Priya', 22,
'B'),
('Shivangi', 22, 'B'),]

# Create a DataFrame object


stu_df = pd.DataFrame(students, columns =['Name', 'Age',
'Section'],
index =['1', '2', '3', '4'])
# gives a tuple of column name and series
# for each column in the dataframe
for (columnName, columnData) in stu_df.iteritems():
print('Column Name : ', columnName)
print('Column Contents : ', columnData.values)
Method #2: Using [ ] operator :
We can iterate over column names and select our desired column.
import pandas as pd
# List of Tuples
students = [('Ankit', 22, 'A'), ('Swapnil', 22, 'B'), ('Priya', 22,
'B'), ('Shivangi', 22, 'B'), ]
# Create a DataFrame object
stu_df = pd.DataFrame(students, columns =['Name', 'Age',
'Section'],
index =['1', '2', '3', '4'])
# Iterate over column names
for column in stu_df:
# Select column contents by column
# name using [] operator
columnSeriesObj = stu_df[column]
print('Column Name : ', column)
print('Column Contents : ', columnSeriesObj.values)
Method #3: Iterate over more than one column :
Assume we need to iterate more than one column. In order to do that we can choose more than one
column from dataframe and iterate over them.
import pandas as pd
# List of Tuples
students = [('Ankit', 22, 'A'), ('Swapnil', 22, 'B'), ('Priya', 22,
'B'),('Shivangi', 22, 'B'),]
# Create a DataFrame object
stu_df = pd.DataFrame(students, columns =['Name', 'Age',
'Section'],
index =['1', '2', '3', '4'])

# Iterate over two given columns


# only from the dataframe
for column in stu_df[['Name', 'Section']]:

# Select column contents by column


# name using [] operator
columnSeriesObj = stu_df[column]
print('Column Name : ', column)
print('Column Contents : ', columnSeriesObj.values)
Method #4: Iterating columns in reverse order :
We can iterate over columns in reverse order as well
import pandas as pd
.
# List of Tuples
students = [('Ankit', 22, 'A'), ('Swapnil', 22, 'B'), ('Priya', 22, 'B'),
('Shivangi', 22, 'B'),]
# Create a DataFrame object
stu_df = pd.DataFrame(students, columns =['Name', 'Age',
'Section'],
index =['1', '2', '3', '4'])
# Iterate over the sequence of column names
# in reverse order
for column in reversed(stu_df.columns):
# Select column contents by column
# name using [] operator
columnSeriesObj = stu_df[column]
print('Column Name : ', column)
print('Column Contents : ', columnSeriesObj.values)
Method #5: Using index (iloc) :
To iterate over the columns of a Dataframe by index we can iterate over a range i.e.
0 to Max number of columns than for each index we can select the contents of the
column using iloc[].
import pandas as pd
# List of Tuples
students = [('Ankit', 22, 'A'), ('Swapnil', 22, 'B'), ('Priya', 22,
'B'), ('Shivangi', 22, 'B'),]
# Create a DataFrame object
stu_df = pd.DataFrame(students, columns =['Name', 'Age',
'Section'],
index =['1', '2', '3', '4'])
# Iterate over the index range from
# 0 to max number of columns in dataframe
for index in range(stu_df.shape[1]):

print('Column Number : ', index)

# Select column by index position using iloc[]


columnSeriesObj = stu_df.iloc[:, index]
print('Column Contents : ', columnSeriesObj.values)

You might also like