Professional Documents
Culture Documents
import pandas as pd
#Series: Creation of Series from – ndarray
import numpy as np # import NumPy with alias np
import pandas as pd
arr1 = np.array([10,20,30,40])
s1 = pd.Series(arr1)
print(s1)
Output:
0 10
1 20
2 30
3 40
dtype: int32
Output:
0 10
1 20
2 30
3 40
4 50
dtype: int64
Output:
0 5
1 4
2 3
3 2
dtype: int64
a 15
b 25
c 40
d 50
dtype: int64
Output:
Output:
0 15.0
1 24.0
2 33.0
3 42.0
4 NaN
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 NaN
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 NaN
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN
Output:
0 15.0
1 24.0
2 33.0
3 42.0
4 NaN
dtype: float64
0 5.0
1 16.0
2 27.0
3 38.0
4 NaN
dtype: float64
0 50.0
1 80.0
2 90.0
3 80.0
4 NaN
dtype: float64
0 2.0
1 5.0
2 10.0
3 20.0
4 NaN
dtype: float64
print(ser1.add(ser2, fill_value=0))
print(ser1.sub(ser2, fill_value=0))
print(ser1.mul(ser2, fill_value=0))
print(ser1.div(ser2, fill_value=1))
Output:
print(ser1.head(3))
print(ser1.tail(2))
print(ser1.head())
print(ser1.tail())
Output:
0 10
1 20
2 30
dtype: int64
3 40
4 50
dtype: int64
0 10
1 20
2 30
3 40
4 50
dtype: int64
0 10
1 20
#Indexing
S5=pd.Series([2,3,5,10,12,15,18,20])
print(S5[4])
print(S5[[2,3,5]])
Output:
12
2 5
3 10
5 15
dtype: int64
#Slicing
print(S5[:3])
print(S5[2:6])
print(S5[1:4:2])
print(S5[::-1]) #slicing in reverse order
Output:
0 2
1 3
2 5
dtype: int64
2 5
3 10
4 12
5 15
dtype: int64
1 3
3 10
dtype: int64
7 20
6 18
5 15
4 12
3 10
2 5
1 3
0 2
dtype: int64
Output:
Updated Series = 0 2
1 3
2 4
3 10
4 12
5 15
6 18
7 20
dtype: int64
S5[3:5]=50
print("Updated Series = ", S5)
Output:
Updated Series = 0 2
1 3
2 4
3 50
4 50
5 15
6 18
7 20
dtype: int64
print(S5)
print("Using Positional Index...")
print(S5.iloc[1:4])
print(CompanySer)
print("Using Labelled Index...")
print(CompanySer.loc['IBM':'Oracle'])
print("Using Positional Index in the series with Lablled Index...")
print(CompanySer[0:3])
Output:
0 2
1 3
2 4
3 50
4 50
5 15
Microsoft Washington
IBM New York
Oracle Texas
SAP Walldorf
dtype: object
#Series attributes
S5.name="New Series"
S5.index.name="SER INDEX"
print(S5.name)
print(S5.index.name)
print(S5.values)
print(S5.empty)
print(S5.size)
Output:
New Series
SER INDEX
[ 2 3 4 50 50 15 18 20]
False
8
Output:
Empty DataFrame
Columns: []
Index: []
Output:
Output:
Ser1 Ser2 Ser3
a 10.0 15.0 -50.0
b 20.0 NaN NaN
c 30.0 25.0 NaN
d 40.0 NaN NaN
e 50.0 NaN -150.0
y NaN NaN 150.0
z NaN 100.0 NaN
Output
sdf['Grade']=pd.Series(['C','B','A','A'])
print(sdf)
Output:
sdf['Subject']=pd.Series(['CS','Maths','Info','English'])
print(sdf)
For example, the following statement sets marks=90 for all subjects for the column name 'Arnab':
ResultDF['Arnab']=90
print(ResultDF)
Output:
Output:
ResultDF.loc['Maths']=0
print(ResultDF)
Output:
Note: If we try to add a row with lesser values than the number of columns in the DataFrame, it
results in a ValueError, with the error message: ValueError: Cannot set a row with mismatched
columns.
Output:
#Extract the top row using head() method and bottom 2 rows using tail() method
print(sdf.head(1))
print(sdf.tail(2))
Output:
#Removing a column
sdf=sdf.drop('Subject',axis=1)
print(sdf)
Output:
Output:
Output:
Output:
print(ResultDF)
ResultDF=ResultDF.rename({'Maths':'Sub1','Science':'Sub2',
'English':'Sub3','Hindi':'Sub4'},
axis='index')
ResultDF=ResultDF.rename({'Arnab':'Stud1','Ramit':'Stud2',
'Samridhi':'Stud3','Mallika':'Stud4'},
axis='columns')
print(ResultDF)
Output:
Output:
Output
Output:
#To read more than one row from a DF, a list of row labels is used as shown below.
#Note that using [[]] returns a DataFrame.
print(ResultDF.loc[['Science', 'Hindi']])
Output:
#Boolean Indexing
print(ResultDF)
Output:
Output:
#String Condition - Extracting Name and Marks column values of the students with A Grade
print(sdf.iloc[(sdf['Grade']=='A').values,[1,3]])
Output:
#Numeric Condition - Extracting Name, Subject and Marks column values of the students who scored
ls than 60
print(sdf.iloc[(sdf['Marks']<60).values,[1,3]])
Output:
Output:
#Filtering rows in dataframe True (1) and False (0) associated with indices.
print(ResultDF.loc[[True, False, True, False]])
Output: