Python Series DataFrames Commands Methods Properties Examples (2022-23) PDF

Python Series – Operations and Methods
import pandas as pd
#Series: Creation of Series from – ndarray
import numpy as np # import NumPy with alias np
import pandas as pd
arr1 = np.array([10,20,30,40])
s1 = pd.Series(arr1)
print(s1)
Output:
0 10
1 20
2 30
3 40
dtype: int32
#Series: Creation of Series from scalar value

import pandas as pd #import Pandas with alias pd
#create a Series with default index
ser1 = pd.Series([10,20,30,40,50]) #create a Series
print(ser1) #Display the series
Output:
0 10
1 20
2 30
3 40
4 50
dtype: int64
#create a Series with positional index and labelled index

ser2 = pd.Series([5,4,3,2])
ser3 = pd.Series([15,25,40,50],index=['a','b','c','d'])
print(ser2)
print(ser3)
Output:
0 5
1 4
2 3
3 2
dtype: int64
a 15
b 25
c 40
d 50
dtype: int64
1 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Creating a series using Dictionary
D1 = {'Microsoft': 'Washington', 'IBM': 'New York','Oracle': 'Texas', 'SAP':'Walldorf'}
print(D1) #Display the dictionary
CompanySer= pd.Series(D1)
print(CompanySer) #Display the series
Output:
{'Microsoft': 'Washington', 'IBM': 'New York', 'Oracle': 'Texas', 'SAP': 'Walldorf'}

Microsoft Washington
IBM New York
Oracle Texas
SAP Walldorf
dtype: object
#Mathematical operations on Series

print(ser1+ser2)
print(ser1-ser2)
print(ser1*ser2)
print(ser1/ser2)
print(ser1//ser2)
Output:
0 15.0
1 24.0
2 33.0
3 42.0
4 NaN
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 NaN
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 NaN
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN

dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN
dtype: float64
#Using Mathematical Methods in Series

print(ser1.add(ser2))
print(ser1.sub(ser2))
print(ser1.mul(ser2))
print(ser1.div(ser2))
Output:
0 15.0
1 24.0
2 33.0
3 42.0
4 NaN
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 NaN
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 NaN
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN
dtype: float64
#Using fill_value property to handle NaN:
print(ser1.add(ser2, fill_value=0))
print(ser1.sub(ser2, fill_value=0))
print(ser1.mul(ser2, fill_value=0))
print(ser1.div(ser2, fill_value=1))
Output:

0 15.0
1 24.0
2 33.0
3 42.0
4 50.0
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 50.0
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 0.0
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 50.0
dtype: float64
#Head and Tail functions
print(ser1.head(3))
print(ser1.tail(2))
print(ser1.head())
print(ser1.tail())
Output:
0 10
1 20
2 30
dtype: int64
3 40
4 50
dtype: int64
0 10
1 20
2 30
3 40
4 50
dtype: int64
0 10
1 20

2 30
3 40
4 50
dtype: int64
#Indexing
S5=pd.Series([2,3,5,10,12,15,18,20])
print(S5[4])
print(S5[[2,3,5]])
Output:
12
2 5
3 10
5 15
dtype: int64
#Slicing
print(S5[:3])
print(S5[2:6])
print(S5[1:4:2])
print(S5[::-1]) #slicing in reverse order
Output:
0 2
1 3
2 5
dtype: int64
2 5
3 10
4 12
5 15
dtype: int64
1 3
3 10
dtype: int64
7 20
6 18
5 15
4 12
3 10
2 5
1 3
0 2
dtype: int64

#Updating value using Indexing
S5[2]=4
print("Updated Series = ", S5)
Output:
Updated Series = 0 2
1 3
2 4
3 10
4 12
5 15
6 18
7 20
dtype: int64
S5[3:5]=50
print("Updated Series = ", S5)
Output:
Updated Series = 0 2
1 3
2 4
3 50
4 50
5 15
6 18
7 20
dtype: int64
#Using loc and iloc Methods
print(S5)
print("Using Positional Index...")
print(S5.iloc[1:4])
print(CompanySer)
print("Using Labelled Index...")
print(CompanySer.loc['IBM':'Oracle'])
print("Using Positional Index in the series with Lablled Index...")
print(CompanySer[0:3])
Output:
0 2
1 3
2 4
3 50
4 50
5 15

6 18
7 20
dtype: int64
Using Positional Index...

1 3
2 4
3 50
dtype: int64
IBM New York
Oracle Texas
SAP Walldorf
dtype: object
Using Labelled Index...

IBM New York
Oracle Texas
dtype: object
Using Positional Index in the series with Labelled Index...

IBM New York
Oracle Texas
dtype: object
#Series attributes
S5.name="New Series"
S5.index.name="SER INDEX"
print(S5.name)
print(S5.index.name)
print(S5.values)
print(S5.empty)
print(S5.size)
Output:
New Series
SER INDEX
[ 2 3 4 50 50 15 18 20]
False
8
Python Data Frame – Operations & Methods

#Creating empty dataframe

import pandas as pd
dFrameEmt = pd.DataFrame()
print(dFrameEmt)
Output:
Empty DataFrame
Columns: []
Index: []
#Creating Series using Dictionary of Series

ResultSheet={'Arnab': pd.Series([90, 91, 97], index=['Maths','Science','Hindi']),
'Ramit': pd.Series([92, 81, 96], index=['Maths','Science','Hindi']),
'Samridhi': pd.Series([89, 91, 88], index=['Maths','Science','Hindi']),
'Riya': pd.Series([81, 71, 67], index=['Maths','Science','Hindi']),
'Mallika': pd.Series([94, 95, 99], index=['Maths','Science','Hindi'])}
ResultDF = pd.DataFrame(ResultSheet)
print(ResultDF)
Output:
#Creating Seris Dictionay of Series - Union of Series Indices

#When a DataFrame is created from a Dictionary of Series
#Resulting index or row labels are a union of all series indexes
Dict1 = { 'Ser1' : pd.Series([10,20,30,40,50],index = ['a', 'b', 'c', 'd', 'e']) ,
'Ser2' : pd.Series([100,15,25], index = ['z', 'a', 'c']),
'Ser3' :pd.Series([150,-50,-150],index = ['y', 'a', 'e']) }
DFUnionIndDemo = pd.DataFrame(Dict1)
print(DFUnionIndDemo)
Output:
Ser1 Ser2 Ser3
a 10.0 15.0 -50.0
b 20.0 NaN NaN
c 30.0 25.0 NaN
d 40.0 NaN NaN
e 50.0 NaN -150.0
y NaN NaN 150.0
z NaN 100.0 NaN
listDict = [{'a':10, 'b':20}, {'a':5, 'b':10, 'c':20}]

print(listDict)
dfListDict = pd.DataFrame(listDict)
print(dfListDict)

[{'a': 10, 'b': 20}, {'a': 5, 'b': 10, 'c': 20}]
a b c
0 10 20 NaN
1 5 10 20.0
#Creating dataframe using dictionary of Lists

student={'RNo':[1,2,3,4],
'Name':['vinay','James','Reena','Sam'], 'Subject':['Info','Info','Info','Info'],
'Marks':[72,85,98,90]}
sdf = pd.DataFrame(student)
print(sdf)
Output:
Operations of Rows and Columns in Dataframe:
#Adding a new column Grade
Method 1: creating a new column using a list:

Consider the following dataframe ResultDF created using Dictionary of Series:
ResultSheet={'Arnab': pd.Series([90, 91, 97], index=['Maths','Science','Hindi']),

'Ramit': pd.Series([92, 81, 96], index=['Maths','Science','Hindi']),
'Samridhi': pd.Series([89, 91, 88], index=['Maths','Science','Hindi']),
'Riya': pd.Series([81, 71, 67], index=['Maths','Science','Hindi']),
'Mallika': pd.Series([94, 95, 99], index=['Maths','Science','Hindi'])}
ResultDF=pd.DataFrame(ResultSheet)
print(ResultDF)
ResultDF['Preeti']=[89,78,76]
print(ResultDF)
Output
Method 2: creating a new column using a new series:

Consider sdf data frame which was created earlier:
sdf['Grade']=pd.Series(['C','B','A','A'])
print(sdf)
Output:

Assigning values to a new column label that does not exist will create a new column at the end. If the
column already exists in the DataFrame then the assignment statement will update the values of the
already existing column
sdf['Subject']=pd.Series(['CS','Maths','Info','English'])
print(sdf)
Changing entire column to a particular value in a DataFrame:
For example, the following statement sets marks=90 for all subjects for the column name 'Arnab':
ResultDF['Arnab']=90
print(ResultDF)
Output:
#Adding a new row:
Using loc() method:

Consider the following dataframe ResultDF
ResultDF.loc['English'] = [85, 86, 83, 80, 90, 89]

print(ResultDF)

#Duplicate Index will not be created if it already exists
#Only value will be updated
ResultDF.loc['English'] = [95, 86, 95, 80, 95,99]
print(ResultDF)
Output:
#using loc[] to change the data values of a row to a particular value.

#sets marks in 'Maths' for all columns to 0:
ResultDF.loc['Maths']=0
print(ResultDF)
Output:
Note: If we try to add a row with lesser values than the number of columns in the DataFrame, it
results in a ValueError, with the error message: ValueError: Cannot set a row with mismatched
columns.
we can set all values of a DataFrame to a particular value.
ResultDF[: ] = 0 # Set all values in ResultDF to 0

print(ResultDF)
Output:
Using append() method:

newstud={'RNo':30,'Name':'Asha','Subject':'Info','Marks':54,'Grade':'B-'}
sdf=sdf.append(newstud,ignore_index=True)
print(sdf)

Output:
#Extract the top row using head() method and bottom 2 rows using tail() method
print(sdf.head(1))
print(sdf.tail(2))
Output:
#Removing a column
sdf=sdf.drop('Subject',axis=1)
print(sdf)
Output:
#Removing multiple columns

sdf=sdf.drop(['RNo','Marks'],axis=1)
print(sdf)
Output:
#Removing a row (update is required)

sdf=sdf.drop(2)
print(sdf)
Output:
#Removing multiple rows – default value of axis parameter = 0

sdf=sdf.drop([1,3]) or sdf=sdf.drop([1,3], axis=0)
print(sdf)

#Extract rows from the index 0 to 1 using loc() iloc() methods
print(sdf.loc[0:2])
print(sdf.iloc[0:2])
Output:
Change the row and column labels in a DF using the rename()
print(ResultDF)
ResultDF=ResultDF.rename({'Maths':'Sub1','Science':'Sub2',
'English':'Sub3','Hindi':'Sub4'},
axis='index')
ResultDF=ResultDF.rename({'Arnab':'Stud1','Ramit':'Stud2',
'Samridhi':'Stud3','Mallika':'Stud4'},
axis='columns')
print(ResultDF)
Output:
#Method 2: Renaming Name column to StudName

sdf.rename(columns={'Name':'StudName'},inplace=True)
print(sdf)
Output:
Labelled based Indexing:

#A single row label returns the row as a Series.
print(ResultDF)
print(ResultDF.loc['Science'])
Output
#When a single column label is passed, it returns the column as a Series.

print(ResultDF.loc[:,'Arnab']) Or print(ResultDF['Arnab'])
Output:
#To read more than one row from a DF, a list of row labels is used as shown below.
#Note that using [[]] returns a DataFrame.
print(ResultDF.loc[['Science', 'Hindi']])
Output:
#Boolean Indexing
print(ResultDF)
#Display True / False of those those scored more than 90

print(ResultDF.loc['Maths'] > 90)
Output:

#Display True / False of those those subjects more than 90 byArnab
print(ResultDF.loc[:,'Arnab']>90)
Output:
#String Condition - Extracting Name and Marks column values of the students with A Grade
print(sdf.iloc[(sdf['Grade']=='A').values,[1,3]])
Output:
#Numeric Condition - Extracting Name, Subject and Marks column values of the students who scored
ls than 60
print(sdf.iloc[(sdf['Marks']<60).values,[1,3]])
Output:
#Updating the Mark of a Sam (Index of Row 3) to 98.

sdf.at[3,'Marks']=98
print(sdf)
Output:
#Change the Name of Vinay to Vijay

sdf.at[0,'StudName']='Vijay'
print(sdf)
Output:

#Displays the rows with label Maths and Science, and column with label Arnab:
print(ResultDF.loc['Maths': 'Science', 'Arnab'])
Output:
#Row slicing and column slicing:

print(ResultDF.loc['Maths': 'Science', 'Arnab':'Samridhi'])
Output:
#Alternative Method Row slicing and column listing

print(ResultDF.loc['Maths': 'Science',['Arnab','Samridhi']] )
Output:
#Filtering rows in dataframe True (1) and False (0) associated with indices.
print(ResultDF.loc[[True, False, True, False]])
Output:
Attributes of Pandas Data Frame:
Attribute Name Purpose

DataFrame.index #To display row labels
DataFrame.columns #To display column labels
DataFrame.dtypes #To display data type of each column in the Data Frame
DataFrame.values #To display a NumPy ndarray
DataFrame.shape #To display a tuple representing the dimensionality of the Data Frame
DataFrame.size #To display the number of values in the DataFrame.
DataFrame.T #To transpose the DataFrame.
DataFrame.head(n) #To display the first n rows in the Data Frame
DataFrame.tail(n) #To display the last n rows in the Data Frame
DataFrame.empty #To return the value True if Data Frame is empty and False otherwise
df=pd.DataFrame() #Create an empty dataFrame

Python Series DataFrames Commands Methods Properties Examples (2022-23) PDF

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Python Series DataFrames Commands Methods Properties Examples (2022-23) PDF

Uploaded by

Copyright:

Available Formats

Python Series – Operations and Methods

#Series: Creation of Series from scalar value

#create a Series with positional index and labelled index

1 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

{'Microsoft': 'Washington', 'IBM': 'New York', 'Oracle': 'Texas', 'SAP': 'Walldorf'}

#Mathematical operations on Series

2 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Using Mathematical Methods in Series

#Using fill_value property to handle NaN:

3 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Head and Tail functions

4 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

5 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Using loc and iloc Methods

6 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

Using Positional Index...

Using Labelled Index...

Using Positional Index in the series with Labelled Index...

Python Data Frame – Operations & Methods

7 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Creating Series using Dictionary of Series

#Creating Seris Dictionay of Series - Union of Series Indices

listDict = [{'a':10, 'b':20}, {'a':5, 'b':10, 'c':20}]

8 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Creating dataframe using dictionary of Lists

Operations of Rows and Columns in Dataframe:

#Adding a new column Grade

Method 1: creating a new column using a list:

ResultSheet={'Arnab': pd.Series([90, 91, 97], index=['Maths','Science','Hindi']),

Method 2: creating a new column using a new series:

9 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

Changing entire column to a particular value in a DataFrame:

#Adding a new row:

Using loc() method:

ResultDF.loc['English'] = [85, 86, 83, 80, 90, 89]

10 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#using loc[] to change the data values of a row to a particular value.

we can set all values of a DataFrame to a particular value.

ResultDF[: ] = 0 # Set all values in ResultDF to 0

Using append() method:

11 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Removing multiple columns

#Removing a row (update is required)

#Removing multiple rows – default value of axis parameter = 0

12 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

Change the row and column labels in a DF using the rename()

#Method 2: Renaming Name column to StudName

Labelled based Indexing:

13 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#When a single column label is passed, it returns the column as a Series.

#Display True / False of those those scored more than 90

14 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Updating the Mark of a Sam (Index of Row 3) to 98.

#Change the Name of Vinay to Vijay

15 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

#Row slicing and column slicing:

#Alternative Method Row slicing and column listing

Attributes of Pandas Data Frame:

Attribute Name Purpose

16 Prepared By : Mr. A. Ranjith Kumar | Department of Computer Science 23/02/2023

You might also like