You are on page 1of 16

Python Series – Operations and Methods

import pandas as pd
#Series: Creation of Series from – ndarray
import numpy as np # import NumPy with alias np
import pandas as pd
arr1 = np.array([10,20,30,40])
s1 = pd.Series(arr1)
print(s1)

Output:
0 10
1 20
2 30
3 40
dtype: int32

#Series: Creation of Series from scalar value


import pandas as pd #import Pandas with alias pd
#create a Series with default index
ser1 = pd.Series([10,20,30,40,50]) #create a Series
print(ser1) #Display the series

Output:
0 10
1 20
2 30
3 40
4 50
dtype: int64

#create a Series with positional index and labelled index


ser2 = pd.Series([5,4,3,2])
ser3 = pd.Series([15,25,40,50],index=['a','b','c','d'])
print(ser2)
print(ser3)

Output:
0 5
1 4
2 3
3 2
dtype: int64
a 15
b 25
c 40
d 50
dtype: int64

1 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


#Creating a series using Dictionary
D1 = {'Microsoft': 'Washington', 'IBM': 'New York','Oracle': 'Texas', 'SAP':'Walldorf'}
print(D1) #Display the dictionary
CompanySer= pd.Series(D1)
print(CompanySer) #Display the series

Output:

{'Microsoft': 'Washington', 'IBM': 'New York', 'Oracle': 'Texas', 'SAP': 'Walldorf'}


Microsoft Washington
IBM New York
Oracle Texas
SAP Walldorf
dtype: object

#Mathematical operations on Series


print(ser1+ser2)
print(ser1-ser2)
print(ser1*ser2)
print(ser1/ser2)
print(ser1//ser2)

Output:
0 15.0
1 24.0
2 33.0
3 42.0
4 NaN
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 NaN
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 NaN
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN

2 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN
dtype: float64

#Using Mathematical Methods in Series


print(ser1.add(ser2))
print(ser1.sub(ser2))
print(ser1.mul(ser2))
print(ser1.div(ser2))

Output:
0 15.0
1 24.0
2 33.0
3 42.0
4 NaN
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 NaN
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 NaN
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN
dtype: float64

#Using fill_value property to handle NaN:

print(ser1.add(ser2, fill_value=0))
print(ser1.sub(ser2, fill_value=0))
print(ser1.mul(ser2, fill_value=0))
print(ser1.div(ser2, fill_value=1))

Output:

3 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


0 15.0
1 24.0
2 33.0
3 42.0
4 50.0
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 50.0
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 0.0
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 50.0
dtype: float64

#Head and Tail functions

print(ser1.head(3))
print(ser1.tail(2))
print(ser1.head())
print(ser1.tail())

Output:
0 10
1 20
2 30
dtype: int64
3 40
4 50
dtype: int64
0 10
1 20
2 30
3 40
4 50
dtype: int64
0 10
1 20

4 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


2 30
3 40
4 50
dtype: int64

#Indexing

S5=pd.Series([2,3,5,10,12,15,18,20])
print(S5[4])
print(S5[[2,3,5]])

Output:
12

2 5
3 10
5 15
dtype: int64

#Slicing
print(S5[:3])
print(S5[2:6])
print(S5[1:4:2])
print(S5[::-1]) #slicing in reverse order

Output:
0 2
1 3
2 5
dtype: int64
2 5
3 10
4 12
5 15
dtype: int64
1 3
3 10
dtype: int64
7 20
6 18
5 15
4 12
3 10
2 5
1 3
0 2
dtype: int64

5 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


#Updating value using Indexing
S5[2]=4
print("Updated Series = ", S5)

Output:
Updated Series = 0 2

1 3
2 4
3 10
4 12
5 15
6 18
7 20
dtype: int64

S5[3:5]=50
print("Updated Series = ", S5)

Output:
Updated Series = 0 2
1 3
2 4
3 50
4 50
5 15
6 18
7 20
dtype: int64

#Using loc and iloc Methods

print(S5)
print("Using Positional Index...")
print(S5.iloc[1:4])
print(CompanySer)
print("Using Labelled Index...")
print(CompanySer.loc['IBM':'Oracle'])
print("Using Positional Index in the series with Lablled Index...")
print(CompanySer[0:3])

Output:
0 2
1 3
2 4
3 50
4 50
5 15

6 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


6 18
7 20
dtype: int64

Using Positional Index...


1 3
2 4
3 50
dtype: int64

Microsoft Washington
IBM New York
Oracle Texas
SAP Walldorf
dtype: object

Using Labelled Index...


IBM New York
Oracle Texas
dtype: object

Using Positional Index in the series with Labelled Index...


Microsoft Washington
IBM New York
Oracle Texas
dtype: object

#Series attributes

S5.name="New Series"
S5.index.name="SER INDEX"
print(S5.name)
print(S5.index.name)
print(S5.values)
print(S5.empty)
print(S5.size)

Output:
New Series
SER INDEX
[ 2 3 4 50 50 15 18 20]
False
8

Python Data Frame – Operations & Methods


#Creating empty dataframe

7 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


import pandas as pd
dFrameEmt = pd.DataFrame()
print(dFrameEmt)

Output:
Empty DataFrame
Columns: []
Index: []

#Creating Series using Dictionary of Series


ResultSheet={'Arnab': pd.Series([90, 91, 97], index=['Maths','Science','Hindi']),
'Ramit': pd.Series([92, 81, 96], index=['Maths','Science','Hindi']),
'Samridhi': pd.Series([89, 91, 88], index=['Maths','Science','Hindi']),
'Riya': pd.Series([81, 71, 67], index=['Maths','Science','Hindi']),
'Mallika': pd.Series([94, 95, 99], index=['Maths','Science','Hindi'])}
ResultDF = pd.DataFrame(ResultSheet)
print(ResultDF)

Output:

#Creating Seris Dictionay of Series - Union of Series Indices


#When a DataFrame is created from a Dictionary of Series
#Resulting index or row labels are a union of all series indexes
Dict1 = { 'Ser1' : pd.Series([10,20,30,40,50],index = ['a', 'b', 'c', 'd', 'e']) ,
'Ser2' : pd.Series([100,15,25], index = ['z', 'a', 'c']),
'Ser3' :pd.Series([150,-50,-150],index = ['y', 'a', 'e']) }
DFUnionIndDemo = pd.DataFrame(Dict1)
print(DFUnionIndDemo)

Output:
Ser1 Ser2 Ser3
a 10.0 15.0 -50.0
b 20.0 NaN NaN
c 30.0 25.0 NaN
d 40.0 NaN NaN
e 50.0 NaN -150.0
y NaN NaN 150.0
z NaN 100.0 NaN

listDict = [{'a':10, 'b':20}, {'a':5, 'b':10, 'c':20}]


print(listDict)
dfListDict = pd.DataFrame(listDict)
print(dfListDict)

8 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


[{'a': 10, 'b': 20}, {'a': 5, 'b': 10, 'c': 20}]
a b c
0 10 20 NaN
1 5 10 20.0

#Creating dataframe using dictionary of Lists


student={'RNo':[1,2,3,4],
'Name':['vinay','James','Reena','Sam'], 'Subject':['Info','Info','Info','Info'],
'Marks':[72,85,98,90]}
sdf = pd.DataFrame(student)
print(sdf)
Output:

Operations of Rows and Columns in Dataframe:

#Adding a new column Grade

Method 1: creating a new column using a list:


Consider the following dataframe ResultDF created using Dictionary of Series:

ResultSheet={'Arnab': pd.Series([90, 91, 97], index=['Maths','Science','Hindi']),


'Ramit': pd.Series([92, 81, 96], index=['Maths','Science','Hindi']),
'Samridhi': pd.Series([89, 91, 88], index=['Maths','Science','Hindi']),
'Riya': pd.Series([81, 71, 67], index=['Maths','Science','Hindi']),
'Mallika': pd.Series([94, 95, 99], index=['Maths','Science','Hindi'])}
ResultDF=pd.DataFrame(ResultSheet)
print(ResultDF)
ResultDF['Preeti']=[89,78,76]
print(ResultDF)

Output

Method 2: creating a new column using a new series:


Consider sdf data frame which was created earlier:

sdf['Grade']=pd.Series(['C','B','A','A'])
print(sdf)

Output:

9 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


Assigning values to a new column label that does not exist will create a new column at the end. If the
column already exists in the DataFrame then the assignment statement will update the values of the
already existing column

sdf['Subject']=pd.Series(['CS','Maths','Info','English'])
print(sdf)

Changing entire column to a particular value in a DataFrame:

For example, the following statement sets marks=90 for all subjects for the column name 'Arnab':

ResultDF['Arnab']=90
print(ResultDF)

Output:

#Adding a new row:

Using loc() method:


Consider the following dataframe ResultDF

ResultDF.loc['English'] = [85, 86, 83, 80, 90, 89]


print(ResultDF)

10 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


#Duplicate Index will not be created if it already exists
#Only value will be updated
ResultDF.loc['English'] = [95, 86, 95, 80, 95,99]
print(ResultDF)

Output:

#using loc[] to change the data values of a row to a particular value.


#sets marks in 'Maths' for all columns to 0:

ResultDF.loc['Maths']=0
print(ResultDF)

Output:

Note: If we try to add a row with lesser values than the number of columns in the DataFrame, it
results in a ValueError, with the error message: ValueError: Cannot set a row with mismatched
columns.

we can set all values of a DataFrame to a particular value.

ResultDF[: ] = 0 # Set all values in ResultDF to 0


print(ResultDF)

Output:

Using append() method:


newstud={'RNo':30,'Name':'Asha','Subject':'Info','Marks':54,'Grade':'B-'}
sdf=sdf.append(newstud,ignore_index=True)
print(sdf)

11 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


Output:

#Extract the top row using head() method and bottom 2 rows using tail() method
print(sdf.head(1))
print(sdf.tail(2))
Output:

#Removing a column
sdf=sdf.drop('Subject',axis=1)
print(sdf)
Output:

#Removing multiple columns


sdf=sdf.drop(['RNo','Marks'],axis=1)
print(sdf)

Output:

#Removing a row (update is required)


sdf=sdf.drop(2)
print(sdf)

Output:

#Removing multiple rows – default value of axis parameter = 0


sdf=sdf.drop([1,3]) or sdf=sdf.drop([1,3], axis=0)
print(sdf)

12 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


#Extract rows from the index 0 to 1 using loc() iloc() methods
print(sdf.loc[0:2])
print(sdf.iloc[0:2])

Output:

Change the row and column labels in a DF using the rename()

print(ResultDF)
ResultDF=ResultDF.rename({'Maths':'Sub1','Science':'Sub2',
'English':'Sub3','Hindi':'Sub4'},
axis='index')

ResultDF=ResultDF.rename({'Arnab':'Stud1','Ramit':'Stud2',
'Samridhi':'Stud3','Mallika':'Stud4'},
axis='columns')
print(ResultDF)

Output:

#Method 2: Renaming Name column to StudName


sdf.rename(columns={'Name':'StudName'},inplace=True)
print(sdf)

Output:

Labelled based Indexing:

13 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


#A single row label returns the row as a Series.
print(ResultDF)
print(ResultDF.loc['Science'])

Output

#When a single column label is passed, it returns the column as a Series.


print(ResultDF.loc[:,'Arnab']) Or print(ResultDF['Arnab'])

Output:

#To read more than one row from a DF, a list of row labels is used as shown below.
#Note that using [[]] returns a DataFrame.
print(ResultDF.loc[['Science', 'Hindi']])

Output:

#Boolean Indexing
print(ResultDF)

#Display True / False of those those scored more than 90


print(ResultDF.loc['Maths'] > 90)

Output:

14 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


#Display True / False of those those subjects more than 90 byArnab
print(ResultDF.loc[:,'Arnab']>90)

Output:

#String Condition - Extracting Name and Marks column values of the students with A Grade
print(sdf.iloc[(sdf['Grade']=='A').values,[1,3]])

Output:

#Numeric Condition - Extracting Name, Subject and Marks column values of the students who scored
ls than 60

print(sdf.iloc[(sdf['Marks']<60).values,[1,3]])

Output:

#Updating the Mark of a Sam (Index of Row 3) to 98.


sdf.at[3,'Marks']=98
print(sdf)

Output:

#Change the Name of Vinay to Vijay


sdf.at[0,'StudName']='Vijay'
print(sdf)
Output:

15 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023


#Displays the rows with label Maths and Science, and column with label Arnab:
print(ResultDF.loc['Maths': 'Science', 'Arnab'])
Output:

#Row slicing and column slicing:


print(ResultDF.loc['Maths': 'Science', 'Arnab':'Samridhi'])
Output:

#Alternative Method Row slicing and column listing


print(ResultDF.loc['Maths': 'Science',['Arnab','Samridhi']] )
Output:

#Filtering rows in dataframe True (1) and False (0) associated with indices.
print(ResultDF.loc[[True, False, True, False]])

Output:

Attributes of Pandas Data Frame:

Attribute Name Purpose


DataFrame.index #To display row labels
DataFrame.columns #To display column labels
DataFrame.dtypes #To display data type of each column in the Data Frame
DataFrame.values #To display a NumPy ndarray
DataFrame.shape #To display a tuple representing the dimensionality of the Data Frame
DataFrame.size #To display the number of values in the DataFrame.
DataFrame.T #To transpose the DataFrame.
DataFrame.head(n) #To display the first n rows in the Data Frame
DataFrame.tail(n) #To display the last n rows in the Data Frame
DataFrame.empty #To return the value True if Data Frame is empty and False otherwise
df=pd.DataFrame() #Create an empty dataFrame

16 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

You might also like