You are on page 1of 7

datascience Set A - Jupyter Notebook http://localhost:8888/notebooks/datascience%20Set%...

In [26]: import pandas as pd


data=[{'name':'Ajay','age':'21','Percentage':'76%'},
{'name':'Vijay','age':'20','Percentage':'80%'},
{'name':'Riya','age':'19','Percentage':'75%'},
{'name':'Priya','age':'20','Percentage':'88%'},
{'name':'Ram','age':'21','Percentage':'67%'},
{'name':'Ajay','age':'21','Percentage':'None'},
{'name':'Vijay','age':'None','Percentage':'None'},
{'name':'Riya','age':'None','Percentage':'75%'},
{'name':'Priya','age':'20','Percentage':'88%'},
{'name':'Ram','age':'21','Percentage':'NaN'}]
data
df=pd.DataFrame(data)
df

Out[26]:
name age Percentage

0 Ajay 21 76%

1 Vijay 20 80%

2 Riya 19 75%

3 Priya 20 88%

4 Ram 21 67%

5 Ajay 21 None

6 Vijay None None

7 Riya None 75%

8 Priya 20 88%

9 Ram 21 NaN

In [27]: df['Remark']=None

In [28]: df

Out[28]:
name age Percentage Remark

0 Ajay 21 76% None

1 Vijay 20 80% None

2 Riya 19 75% None

3 Priya 20 88% None

4 Ram 21 67% None

5 Ajay 21 None None

6 Vijay None None None

7 Riya None 75% None

8 Priya 20 88% None

9 Ram 21 NaN None

1 of 7 07/08/23, 14:02
datascience Set A - Jupyter Notebook http://localhost:8888/notebooks/datascience%20Set%...

In [29]: df.describe()

Out[29]:
name age Percentage Remark

count 10 10 10 0

unique 5 4 7 0

top Ajay 21 75% NaN

freq 2 4 2 NaN

In [25]: df.isnull()

Out[25]:
name age Percentage Remark

0 False False False True

1 False False False True

2 False False False True

3 False False False True

4 False False False True

5 False False False True

6 False False False True

7 False False False True

8 False False False True

9 False False False True

In [32]: df.duplicated()

Out[32]: 0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 True
9 False
dtype: bool

In [33]: df.drop(columns='Remark',axis=1,inplace=True)

2 of 7 07/08/23, 14:02
datascience Set A - Jupyter Notebook http://localhost:8888/notebooks/datascience%20Set%...

In [34]: df

Out[34]:
name age Percentage

0 Ajay 21 76%

1 Vijay 20 80%

2 Riya 19 75%

3 Priya 20 88%

4 Ram 21 67%

5 Ajay 21 None

6 Vijay None None

7 Riya None 75%

8 Priya 20 88%

9 Ram 21 NaN

In [54]: import pandas as pd


df=pd.read_csv('/home/bcalab-5/Downloads/Iris.csv')
df

Out[54]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

3 4 4.6 3.1 1.5 0.2 Iris-setosa

4 5 5.0 3.6 1.4 0.2 Iris-setosa

... ... ... ... ... ... ...

145 146 6.7 3.0 5.2 2.3 Iris-virginica

146 147 6.3 2.5 5.0 1.9 Iris-virginica

147 148 6.5 3.0 5.2 2.0 Iris-virginica

148 149 6.2 3.4 5.4 2.3 Iris-virginica

149 150 5.9 3.0 5.1 1.8 Iris-virginica

150 rows × 6 columns

In [55]: df.shape

Out[55]: (150, 6)

3 of 7 07/08/23, 14:02
datascience Set A - Jupyter Notebook http://localhost:8888/notebooks/datascience%20Set%...

In [56]: df.describe()

Out[56]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm

count 150.000000 150.000000 150.000000 150.000000 150.000000

mean 75.500000 5.843333 3.054000 3.758667 1.198667

std 43.445368 0.828066 0.433594 1.764420 0.763161

min 1.000000 4.300000 2.000000 1.000000 0.100000

25% 38.250000 5.100000 2.800000 1.600000 0.300000

50% 75.500000 5.800000 3.000000 4.350000 1.300000

75% 112.750000 6.400000 3.300000 5.100000 1.800000

max 150.000000 7.900000 4.400000 6.900000 2.500000

In [57]: df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Id 150 non-null int64
1 SepalLengthCm 150 non-null float64
2 SepalWidthCm 150 non-null float64
3 PetalLengthCm 150 non-null float64
4 PetalWidthCm 150 non-null float64
5 Species 150 non-null object
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB

In [58]: df.dtypes

Out[58]: Id int64
SepalLengthCm float64
SepalWidthCm float64
PetalLengthCm float64
PetalWidthCm float64
Species object
dtype: object

4 of 7 07/08/23, 14:02
datascience Set A - Jupyter Notebook http://localhost:8888/notebooks/datascience%20Set%...

In [61]: df.head(20)

Out[61]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

3 4 4.6 3.1 1.5 0.2 Iris-setosa

4 5 5.0 3.6 1.4 0.2 Iris-setosa

5 6 5.4 3.9 1.7 0.4 Iris-setosa

6 7 4.6 3.4 1.4 0.3 Iris-setosa

7 8 5.0 3.4 1.5 0.2 Iris-setosa

8 9 4.4 2.9 1.4 0.2 Iris-setosa

9 10 4.9 3.1 1.5 0.1 Iris-setosa

10 11 5.4 3.7 1.5 0.2 Iris-setosa

11 12 4.8 3.4 1.6 0.2 Iris-setosa

12 13 4.8 3.0 1.4 0.1 Iris-setosa

13 14 4.3 3.0 1.1 0.1 Iris-setosa

14 15 5.8 4.0 1.2 0.2 Iris-setosa

15 16 5.7 4.4 1.5 0.4 Iris-setosa

16 17 5.4 3.9 1.3 0.4 Iris-setosa

17 18 5.1 3.5 1.4 0.3 Iris-setosa

18 19 5.7 3.8 1.7 0.3 Iris-setosa

19 20 5.1 3.8 1.5 0.3 Iris-setosa

In [60]: df.tail()

Out[60]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

145 146 6.7 3.0 5.2 2.3 Iris-virginica

146 147 6.3 2.5 5.0 1.9 Iris-virginica

147 148 6.5 3.0 5.2 2.0 Iris-virginica

148 149 6.2 3.4 5.4 2.3 Iris-virginica

149 150 5.9 3.0 5.1 1.8 Iris-virginica

5 of 7 07/08/23, 14:02
datascience Set A - Jupyter Notebook http://localhost:8888/notebooks/datascience%20Set%...

In [62]: df.sample(10)

Out[62]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

37 38 4.9 3.1 1.5 0.1 Iris-setosa

148 149 6.2 3.4 5.4 2.3 Iris-virginica

7 8 5.0 3.4 1.5 0.2 Iris-setosa

27 28 5.2 3.5 1.5 0.2 Iris-setosa

121 122 5.6 2.8 4.9 2.0 Iris-virginica

114 115 5.8 2.8 5.1 2.4 Iris-virginica

58 59 6.6 2.9 4.6 1.3 Iris-versicolor

147 148 6.5 3.0 5.2 2.0 Iris-virginica

50 51 7.0 3.2 4.7 1.4 Iris-versicolor

125 126 7.2 3.2 6.0 1.8 Iris-virginica

In [63]: df.size

Out[63]: 900

In [64]: df.columns

Out[64]: Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'Pet


alWidthCm',
'Species'],
dtype='object')

In [65]: df['Species'].value_counts()

Out[65]: Iris-setosa 50
Iris-versicolor 50
Iris-virginica 50
Name: Species, dtype: int64

6 of 7 07/08/23, 14:02
datascience Set A - Jupyter Notebook http://localhost:8888/notebooks/datascience%20Set%...

In [67]: sliced_data=df[10:20]
print(sliced_data)

Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm


Species
10 11 5.4 3.7 1.5 0.2 I
ris-setosa
11 12 4.8 3.4 1.6 0.2 I
ris-setosa
12 13 4.8 3.0 1.4 0.1 I
ris-setosa
13 14 4.3 3.0 1.1 0.1 I
ris-setosa
14 15 5.8 4.0 1.2 0.2 I
ris-setosa
15 16 5.7 4.4 1.5 0.4 I
ris-setosa
16 17 5.4 3.9 1.3 0.4 I
ris-setosa
17 18 5.1 3.5 1.4 0.3 I
ris-setosa
18 19 5.7 3.8 1.7 0.3 I
ris-setosa
19 20 5.1 3.8 1.5 0.3 I
ris-setosa

In [69]: specific_data=df[["Id","Species"]]
print(specific_data)

Id Species
0 1 Iris-setosa
1 2 Iris-setosa
2 3 Iris-setosa
3 4 Iris-setosa
4 5 Iris-setosa
.. ... ...
145 146 Iris-virginica
146 147 Iris-virginica
147 148 Iris-virginica
148 149 Iris-virginica
149 150 Iris-virginica

[150 rows x 2 columns]

In [ ]:

7 of 7 07/08/23, 14:02

You might also like