Professional Documents
Culture Documents
ipynb - Colaboratory
import pandas as pd
pd.read_csv('/content/Fortune_10.csv')
pd.read_csv('/content/Fortune_10.csv', header = 1)
IT 6,482,465
1 Lamtone 2009 $11,757,018 5274553 30%
Services Dollars
Financial 916,455
0 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
1 3 Canecorporation Health 2012 $10,597,009 3005820 7%
Dollars
7,429,377
2 4 Mattouch IT Services 2013 $14,026,934 6597557 26%
Dollars
7,435,363
3 5 Techdrill Health 2009 $10,573,990 3138627 8%
Dollars
5,470,303
4 6 Techline Health 2006 $13,898,119 8427816 23%
Dollars
6,249,498
5 7 Cityace Health 2010 $9,254,614 3005116 6%
Dollars
0 1 2 3 4 5 6 7
6,482,465
1 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
2 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
3 3 Canecorporation Health 2012 $10,597,009 3005820 7%
Dollars
7,429,377
4 4 Mattouch IT Services 2013 $14,026,934 6597557 26%
Dollars
7,435,363
5 5 Techdrill Health 2009 $10,573,990 3138627 8%
Dollars
5,470,303
6 6 Techline Health 2006 $13,898,119 8427816 23%
Dollars
6 249 498
https://colab.research.google.com/drive/1RbLXKZuMpKsU5vAeCx8PJcPoUr1zmA0l#scrollTo=wALaqraTGuUT&printMode=true 1/7
10/22/23, 10:44 PM Untitled5.ipynb - Colaboratory
6,482,465
1 1 Lamtone IT Services 2009 $11,757,018 5274553
Dollars
Financial 916,455
2 2 Stripfind 2010 $12,329,371 11412916
Services Dollars
7,591,189
3 3 Canecorporation Health 2012 $10,597,009 3005820
Dollars
pd.read_csv('/content/Fortune_10.csv', header = None, prefix = 'Data') 7,429,377
4 4 Mattouch IT Services 2013 $14,026,934 6597557
Dollars
<ipython-input-8-76d5d33cde48>:1: FutureWarning: The prefix argument has been depreca
7,435,363
5 5 Techdrill Health 2009 $10,573,990 3138627
Dollars
pd.read_csv('/content/Fortune_10.csv', header = None, prefix = 'Data')
5,470,303
6 Data0 6 Data1
Techline Data2
Health Data32006 $13,898,119
Data4 Data5 Data6 Data7
8427816
Dollars
0 ID Name Industry Inception Revenue Expenses Profit Growth
6,249,498
7 7 Cityace Health 2010 $9,254,614 3005116
D
6,482,465 ll
1 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
2 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
3 3 Canecorporation Health 2012 $10,597,009 3005820 7%
Dollars
7,429,377
4 4 Mattouch IT Services 2013 $14,026,934 6597557 26%
Dollars
7,435,363
5 5 Techdrill Health 2009 $10,573,990 3138627 8%
Dollars
5,470,303
6 6 Techline Health 2006 $13,898,119 8427816 23%
Dollars
6,249,498
7 7 Cityace Health 2010 $9,254,614 3005116 6%
D ll
pd.read_csv('/content/Fortune_10.csv', names = ['ID', 'Name', 'Industry', 'Inception', 'Revenue', 'Expenses', 'Profit', 'Growth',])
6,482,465
1 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
2 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
3 3 Canecorporation Health 2012 $10,597,009 3005820 7%
Dollars
7,429,377
4 4 Mattouch IT Services 2013 $14,026,934 6597557 26%
Dollars
7,435,363
5 5 Techdrill Health 2009 $10,573,990 3138627 8%
Dollars
5,470,303
6 6 Techline Health 2006 $13,898,119 8427816 23%
Dollars
6 249 498
https://colab.research.google.com/drive/1RbLXKZuMpKsU5vAeCx8PJcPoUr1zmA0l#scrollTo=wALaqraTGuUT&printMode=true 2/7
10/22/23, 10:44 PM Untitled5.ipynb - Colaboratory
6,482,465
0 1.0 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
1 2.0 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
df = pd.read_csv('/content/Fortune_10.csv', dtype = {'ID': 'float64', 'Profit': 'float64'})
df 7,591,189
2 3.0 Canecorporation Health 2012 $10,597,009 3005820 7%
Dollars
7,429,377
3 4.0 Mattouch IT Services 2013 $14,026,934 6597557.0 26%
Dollars
7,435,363
4 5.0 Techdrill Health 2009 $10,573,990 3138627.0 8%
Dollars
5,470,303
5 6.0 Techline Health 2006 $13,898,119 8427816.0 23%
Dollars
6 249 498
6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
1 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
2 3 Canecorporation Health 2012 $10,597,009 3005820 7%
Dollars
7,429,377
3 4 Mattouch IT Services 2013 $14,026,934 6597557 26%
Dollars
7,435,363
4 5 Techdrill Health 2009 $10,573,990 3138627 8%
Dollars
5,470,303
5 6 Techline Health 2006 $13,898,119 8427816 23%
Dollars
6 249 498
'''
# write make anyname in 'NaN' then you have to write 'na_values' = that name
# if you don't want to change the any name convert into 'NaN' then write 'keep_default_na = True' after file name
'''
'\n# write make anyname in 'NaN' then you have to write 'na_values' = that name \n#
if you don't want to change the any name convert into 'NaN' then write 'keep_default
na = True' after file name \n'
df = pd.read_csv('/content/Fortune_10.csv')
df
https://colab.research.google.com/drive/1RbLXKZuMpKsU5vAeCx8PJcPoUr1zmA0l#scrollTo=wALaqraTGuUT&printMode=true 3/7
10/22/23, 10:44 PM Untitled5.ipynb - Colaboratory
df.isnull() 6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
ID Name Industry Inception Revenue Expenses Profit Growth
Financial 916,455
1 2 Stripfind 2010 $12,329,371 11412916 20%
0 False False False Services
False False False DollarsFalse
False
4 False FalseTechdrill
False False True False 7,435,363 True
4 5 Health 2009 NaN False 3138627 NaN
Dollars
5 False False False False True False False False
5,470,303
5 6 Techline Health 2006 NaN 8427816 23%
6 False False False False False False False
DollarsFalse
df.isnull().sum()
ID 0
Name 0
Industry 0
Inception 0
Revenue 2
Expenses 1
Profit 0
Growth 3
dtype: int64
df.isnull().sum().sum()
df.notnull()
df.notnull().sum()
ID 10
Name 10
Industry 10
Inception 10
Revenue 8
Expenses 9
Profit 10
Growth 7
dtype: int64
df.notnull().sum().sum()
74
# Series
import numpy as np
https://colab.research.google.com/drive/1RbLXKZuMpKsU5vAeCx8PJcPoUr1zmA0l#scrollTo=wALaqraTGuUT&printMode=true 4/7
10/22/23, 10:44 PM Untitled5.ipynb - Colaboratory
sr = pd.Series([1,2,3,np.nan,4,np.NAN])
sr
0 1.0
1 2.0
2 3.0
3 NaN
4 4.0
5 NaN
dtype: float64
sr.isnull()
0 False
1 False
2 False
3 True
4 False
5 True
dtype: bool
sr.isnull().sum()
Dropna()
Dropna function: axis, how, thresh, subset, inplace
df.dropna()
6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
1 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
6,249,498
6 7 Cityace Health 2010 $9,254,614 3005116 6%
Dollars
df.dropna(axis = 1)
df.dropna(how = 'any')
6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
1 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
6,249,498
6 7 Cityace Health 2010 $9,254,614 3005116 6%
Dollars
https://colab.research.google.com/drive/1RbLXKZuMpKsU5vAeCx8PJcPoUr1zmA0l#scrollTo=wALaqraTGuUT&printMode=true 5/7
10/22/23, 10:44 PM Untitled5.ipynb - Colaboratory
df.dropna(axis = 1, how = 'any')
df.dropna(how = 'all')
# it is usefull when the whole row and whole is NaN then it remove the row/column
6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
1 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
2 3 Canecorporation Health 2012 $10,597,009 3005820 NaN
Dollars
7,429,377
3 4 Mattouch IT Services 2013 $14,026,934 6597557 NaN
Dollars
7,435,363
4 5 Techdrill Health 2009 NaN 3138627 NaN
Dollars
5,470,303
5 6 Techline Health 2006 NaN 8427816 23%
Dollars
6 249 498
df.dropna(thresh = 1)
# it is going to remove row/column if thresh > notnull
6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
1 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
2 3 Canecorporation Health 2012 $10,597,009 3005820 NaN
Dollars
7,429,377
3 4 Mattouch IT Services 2013 $14,026,934 6597557 NaN
Dollars
7,435,363
4 5 Techdrill Health 2009 NaN 3138627 NaN
Dollars
5,470,303
5 6 Techline Health 2006 NaN 8427816 23%
Dollars
6 249 498
df.dropna(subset = ['Revenue'])
# this func. just remove those row in Revenue col where value is nan
https://colab.research.google.com/drive/1RbLXKZuMpKsU5vAeCx8PJcPoUr1zmA0l#scrollTo=wALaqraTGuUT&printMode=true 6/7
10/22/23, 10:44 PM Untitled5.ipynb - Colaboratory
6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
#df.dropna(inplace = True)
Financial 916,455
#df 1 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
7,591,189
2 3 Canecorporation Health 2012 $10,597,009 3005820 NaN
Fillna() Dollars
7,429,377
3 4 Mattouch IT Services 2013 $14,026,934 6597557 NaN
## Dollars
6,249,498
6 7 Cityace Health 2010 $9,254,614 3005116 6%
Dollars
df.fillna(0)
6,482,465
0 1 Lamtone IT Services 2009 $11,757,018 5274553 30%
Dollars
Financial 916,455
1 2 Stripfind 2010 $12,329,371 11412916 20%
Services Dollars
6,249,498
6 7 Cityace Health 2010 $9,254,614 3005116 6%
Dollars
https://colab.research.google.com/drive/1RbLXKZuMpKsU5vAeCx8PJcPoUr1zmA0l#scrollTo=wALaqraTGuUT&printMode=true 7/7