Professional Documents
Culture Documents
In [4]:
import pandas as pd
file= pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamon
ds.csv')
file.head(10)
Out[4]:
5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
9 0.23 Very Good H VS1 59.4 61.0 338 4.00 4.05 2.39
In [5]:
import pandas as pd
coloum = ['carat', 'cut', 'x', 'y', 'z']
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("First 6 rows:")
diamonds[coloum].head(6)
First 6 rows:
Out[5]:
carat cut x y z
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 1/10
8/9/2020 pandas(set2)
In [6]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print(diamonds['carat'])
0 0.23
1 0.21
2 0.23
3 0.29
4 0.31
...
53935 0.72
53936 0.72
53937 0.70
53938 0.86
53939 0.75
Name: carat, Length: 53940, dtype: float64
In [7]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
diamonds['Quality–color'] = diamonds.cut + ', ' + diamonds.color
diamonds.head(10)
Out[7]:
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43 Ideal, E
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31 Premium, E
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31 Good, E
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63 Premium, I
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75 Good, J
5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48 Very Good, J
6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47 Very Good, I
7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53 Very Good, H
8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49 Fair, E
9 0.23 Very Good H VS1 59.4 61.0 338 4.00 4.05 2.39 Very Good, H
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 2/10
8/9/2020 pandas(set2)
In [8]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Number of rows and columns:")
print(diamonds.shape)
print("Data type of each column:")
print(diamonds.dtypes)
In [9]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\nAfter renaming two of the columns of the diamond dataframe:")
diamonds.rename(columns={'color':'diamond_color', 'price':'dimaond_price'}, inplace=Tru
e)
diamonds.head()
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
Out[9]:
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 3/10
8/9/2020 pandas(set2)
In [10]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\n After removing multiple rows:")
diamonds.drop([1, 4, 5], inplace=True)
diamonds.head()
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
Out[10]:
6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
In [11]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\n After removing the second column of the Dataframe:")
diamonds.drop('cut',axis=1, inplace=True)
print(diamonds.head())
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 4/10
8/9/2020 pandas(set2)
In [12]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\n cut Series in ascending order :")
x = diamonds.cut.sort_values(ascending=True)
print(x)
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 5/10
8/9/2020 pandas(set2)
In [13]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\nSort the entire diamonds DataFrame by the 'carat' Series in ascending order")
result = diamonds.sort_values('carat')
print(result)
print("\nSort the entire diamonds DataFrame by the 'carat' Series in descending order"
)
result = diamonds.sort_values('carat', ascending=False)
print(result)
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 6/10
8/9/2020 pandas(set2)
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
Sort the entire diamonds DataFrame by the 'carat' Series in ascending orde
r
carat cut color clarity depth table price x y
z
31593 0.20 Premium E VS2 61.1 59.0 367 3.81 3.78 2.
32
31597 0.20 Ideal D VS2 61.5 57.0 367 3.81 3.77 2.
33
31596 0.20 Premium F VS2 62.6 59.0 367 3.73 3.71 2.
33
31595 0.20 Ideal E VS2 59.7 55.0 367 3.86 3.84 2.
30
31594 0.20 Premium E VS2 59.7 62.0 367 3.84 3.80 2.
28
... ... ... ... ... ... ... ... ... ...
...
25999 4.01 Premium J I1 62.5 62.0 15223 10.02 9.94 6.
24
25998 4.01 Premium I I1 61.0 61.0 15223 10.14 10.10 6.
17
27130 4.13 Fair H I1 64.8 61.0 17329 10.00 9.85 6.
43
27630 4.50 Fair J I1 65.8 58.0 18531 10.23 10.16 6.
72
27415 5.01 Fair J I1 65.5 59.0 18018 10.74 10.54 6.
98
In [14]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head(20))
print("\nRows to only show carat weight at least 0.3:")
booleans = []
for w in diamonds.carat:
if w >= .3:
booleans.append(True)
else:
booleans.append(False)
print(booleans[0:20])
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
5 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 2.48
6 0.24 Very Good I VVS1 62.3 57.0 336 3.95 3.98 2.47
7 0.26 Very Good H SI1 61.9 55.0 337 4.07 4.11 2.53
8 0.22 Fair E VS2 65.1 61.0 337 3.87 3.78 2.49
9 0.23 Very Good H VS1 59.4 61.0 338 4.00 4.05 2.39
10 0.30 Good J SI1 64.0 55.0 339 4.25 4.28 2.73
11 0.23 Ideal J VS1 62.8 56.0 340 3.93 3.90 2.46
12 0.22 Premium F SI1 60.4 61.0 342 3.88 3.84 2.33
13 0.31 Ideal J SI2 62.2 54.0 344 4.35 4.37 2.71
14 0.20 Premium E SI2 60.2 62.0 345 3.79 3.75 2.27
15 0.32 Premium E I1 60.9 58.0 345 4.38 4.42 2.68
16 0.30 Ideal I SI2 62.0 54.0 348 4.31 4.34 2.68
17 0.30 Good J SI1 63.4 54.0 351 4.23 4.29 2.70
18 0.30 Good J SI1 63.8 56.0 351 4.23 4.26 2.71
19 0.30 Very Good J SI1 62.7 59.0 351 4.21 4.27 2.66
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 8/10
8/9/2020 pandas(set2)
In [15]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\nDiamonds where length>5, width>5 and depth>5:")
result = diamonds[(diamonds.x>5) & (diamonds.y>5) & (diamonds.z>5)]
print(result.head())
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
In [16]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\nDrop all non-numeric columns of diamonds DataFrame:")
print(diamonds.dtypes)
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 9/10
8/9/2020 pandas(set2)
In [17]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\nMean of each numeric column of diamonds DataFrame:")
print(diamonds.mean())
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
In [18]:
import pandas as pd
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/d
iamonds.csv')
print("Original Dataframe:")
print(diamonds.head())
print("\nCount, minimum, maximum price for each cut of diamonds DataFrame:")
print(diamonds.groupby('cut').price.agg(['count', 'min', 'max']))
Original Dataframe:
carat cut color clarity depth table price x y z
0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43
1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31
2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31
3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63
4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75
In [ ]:
localhost:8888/nbconvert/html/pandas(set2).ipynb?download=false 10/10