CLEANING DATA SET - Jupyter Notebook

30/10/2023, 10:31 CLEANING DATA SET - Jupyter Notebook
In [1]: import pandas as pd

import pymysql as sql
db=sql.connect(host='localhost',user='root',password='manish@sql0047',datab

data=pd.read_sql_query('select * from emp',db) #isse dataframe bna
data

C:\Users\Acer\AppData\Local\Temp\ipykernel_21408\2883241542.py:5: UserWarn
ing: pandas only supports SQLAlchemy connectable (engine/connection) or da
tabase string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are n
ot tested. Please consider using SQLAlchemy.
data=pd.read_sql_query('select * from emp',db) #isse dataframe
bnane ki jarurt nhi h by default bn jati hai
Out[1]: id name lastname age city salary
0 111 Rohit Verma 27 Meerut 2000
1 112 Monu Kasana 23 Ghaziabad 5000
2 113 Vinod Sharma 28 Noida 12000
3 114 Satish Bhati 25 Bulandsher 4000
4 115 Manish Dhama 23 Greater Noida 10000
5 116 Sachin Dedha 24 Mujaffarnagar 9000
6 117 Manoj Tyagi 22 New Delhi 14000
In [2]: # CLEANING DATASET WHI WORK KREGA JHA PAR [NAN]

# in the above dataset there is no null value
👈 VALUES HOGI----------
In [3]: data.isnull().sum() # this query check the null values of each column...
Out[3]: id 0
name 0
lastname 0
age 0
city 0
salary 0
dtype: int64
In [4]: # example--------
import numpy as np
s=np.array([5,6,7,8,np.nan,44,55,np.nan])
s
Out[4]: array([ 5., 6., 7., 8., nan, 44., 55., nan])
In [5]: np.mean(s) # yha par koi bhi calculation possible nhi hai kyoki nan valu
Out[5]: nan
localhost:8888/notebooks/CLEANING DATA SET.ipynb 1/15

In [6]: # ANOTHER EXAMPLE OF THE CLEANING DATA SET ------->> FIRST WE EXTRACT THE DA
In [7]: import pandas as pd

data=pd.read_excel("C:/Users/Acer/Desktop/DATA12.xlsx")
data
Out[7]: ID Name Industry Inception Revenue Expenses Profit Growth Sa
IT 6,482,465
0 1.0 Lamtone 2009.0 $11,757,018 5274553.0 0.30 N
Services Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 $12,329,371 11412916.0 NaN N
Services Dollars
2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN N
IT 7,429,377
3 4.0 NaN 2013.0 NaN 6597557.0 NaN N
Services Dollars
7,435,363
4 5.0 NaN NaN NaN NaN 3138627.0 NaN N
Dollars
5,470,303
5 6.0 Techline Health 2006.0 NaN 8427816.0 0.23 N
Dollars
6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06 N
3,878,113
7 8.0 Kayelectronics NaN 2009.0 $9,451,943 5573830.0 0.04 N
Dollars
IT
8 9.0 Ganzlax 2011.0 $14,001,180 NaN 11901180.0 0.18 N
Services
9 NaN NaN NaN NaN NaN NaN NaN NaN N
In [8]: data.isnull().sum() # this function count the null values of the each col
Out[8]: ID 1
Name 3
Industry 4
Inception 2
Revenue 4
Expenses 4
Profit 1
Growth 5
Salary 10
dtype: int64
In [9]: data.count(axis=1) # this count the non null values of each row
Out[9]: 0 8
1 7
2 6
3 5
4 3
5 7
6 6
7 7
8 7
9 0
dtype: int64

In [10]: data.count(axis=0) # thiscount the non null values of the each column
Out[10]: ID 9
Name 7
Industry 6
Inception 8
Revenue 6
Expenses 6
Profit 9
Growth 5
Salary 0
dtype: int64
In [11]: data
Out[11]: ID Name Industry Inception Revenue Expenses Profit Growth Sa
IT 6,482,465
0 1.0 Lamtone 2009.0 $11,757,018 5274553.0 0.30 N
Services Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 $12,329,371 11412916.0 NaN N
Services Dollars
2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN N
IT 7,429,377
3 4.0 NaN 2013.0 NaN 6597557.0 NaN N
Services Dollars
7,435,363
4 5.0 NaN NaN NaN NaN 3138627.0 NaN N
Dollars
5,470,303
5 6.0 Techline Health 2006.0 NaN 8427816.0 0.23 N
Dollars
6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06 N
3,878,113
7 8.0 Kayelectronics NaN 2009.0 $9,451,943 5573830.0 0.04 N
Dollars
IT
8 9.0 Ganzlax 2011.0 $14,001,180 NaN 11901180.0 0.18 N
Services
9 NaN NaN NaN NaN NaN NaN NaN NaN N
In [12]: # HERE ALL THE VALUES OF THE SALARY COLUMN ARE NAN SO WE WANT TO DROP THE W
data.drop(['Salary'],axis=1,inplace=True) # isse salary wala column perma

In [13]: data
Out[13]: ID Name Industry Inception Revenue Expenses Profit Growth
IT 6,482,465
0 1.0 Lamtone 2009.0 $11,757,018 5274553.0 0.30
Services Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 $12,329,371 11412916.0 NaN
Services Dollars
2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN
IT 7,429,377
3 4.0 NaN 2013.0 NaN 6597557.0 NaN
Services Dollars
7,435,363
4 5.0 NaN NaN NaN NaN 3138627.0 NaN
Dollars
5,470,303
5 6.0 Techline Health 2006.0 NaN 8427816.0 0.23
Dollars
6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06
3,878,113
7 8.0 Kayelectronics NaN 2009.0 $9,451,943 5573830.0 0.04
Dollars
IT
8 9.0 Ganzlax 2011.0 $14,001,180 NaN 11901180.0 0.18
Services
9 NaN NaN NaN NaN NaN NaN NaN NaN
In [14]: # AB ROW KE LIYE CHECK KRENGE -------->>>

data.count(axis=1)
Out[14]: 0 8
1 7
2 6
3 5
4 3
5 7
6 6
7 7
8 7
9 0
dtype: int64
In [15]: # isme ninth row mein total NAN values hai , so now we clean the whole nint
data.dropna(how='all',inplace=True) # this remove the row which have all NA

In [16]: data
6,482,465
0 1.0 Lamtone IT Services 2009.0 $11,757,018 5274553.0 0.30
Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 $12,329,371 11412916.0 NaN
Services Dollars
7,429,377
3 4.0 NaN IT Services 2013.0 NaN 6597557.0 NaN
Dollars
7,435,363
Dollars
5,470,303
Dollars
6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06
3,878,113
Dollars
8 9.0 Ganzlax IT Services 2011.0 $14,001,180 NaN 11901180.0 0.18
In [17]: data.dropna(how='any') # isse ek bhi NAN value hogi row mein vo row delete
0 1.0 Lamtone IT Services 2009.0 $11,757,018 6,482,465 Dollars 5274553.0 0.3
In [18]: data
6,482,465
Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 $12,329,371 11412916.0 NaN
Services Dollars
7,429,377
3 4.0 NaN IT Services 2013.0 NaN 6597557.0 NaN
Dollars
7,435,363
Dollars
5,470,303
Dollars
6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06
3,878,113
Dollars

In [19]: # NOW WE WORK ON REVENUE & EXPENSES COLUMNS ( cleaning the NAN values from

#STEP 1 =>> fill zero where NAN values in the column

data.fillna({'Revenue':'0'},inplace=True)
data
6,482,465
Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 $12,329,371 11412916.0 NaN
Services Dollars
7,429,377
3 4.0 NaN IT Services 2013.0 0 6597557.0 NaN
Dollars
7,435,363
4 5.0 NaN NaN NaN 0 3138627.0 NaN
Dollars
5,470,303
5 6.0 Techline Health 2006.0 0 8427816.0 0.23
Dollars
6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06
3,878,113
Dollars
In [20]: # STEP 2 =>> make list

n=data['Revenue']
n2=list(n)
n2
Out[20]: ['$11,757,018 ',

'$12,329,371 ',
'$10,597,009 ',
'0',
'0',
'0',
'$9,254,614 ',
'$9,451,943 ',
'$14,001,180 ']
In [21]: # STEP 3=>> remove extra things like dollar($) and comma(,) from the list-
u=[]
for i in n2:
t=""
for j in i:
if(j!="$" and j!=","):
t=t+j
u.append(t)

In [22]: u
Out[22]: ['11757018 ',

'12329371 ',
'10597009 ',
'0',
'0',
'0',
'9254614 ',
'9451943 ',
'14001180 ']
In [23]:
# STEP 4=>> Now all the correct values are in the variable {u} now move th

data['Revenue']=u
data
6,482,465
0 1.0 Lamtone IT Services 2009.0 11757018 5274553.0 0.30
Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 12329371 11412916.0 NaN
Services Dollars
2 3.0 Canecorporation Health 2012.0 10597009 NaN 3005820.0 NaN
7,429,377
Dollars
7,435,363
4 5.0 NaN NaN NaN 0 3138627.0 NaN
Dollars
5,470,303
5 6.0 Techline Health 2006.0 0 8427816.0 0.23
Dollars
6 7.0 Cityace NaN 2010.0 9254614 NaN 3005116.0 0.06
3,878,113
7 8.0 Kayelectronics NaN 2009.0 9451943 5573830.0 0.04
Dollars
8 9.0 Ganzlax IT Services 2011.0 14001180 NaN 11901180.0 0.18
In [24]: data.info() # isse pyta chla Revenue abhi bhi object hai means String valu

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9 entries, 0 to 8
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 9 non-null float64
1 Name 7 non-null object
2 Industry 6 non-null object
3 Inception 8 non-null float64
4 Revenue 9 non-null object
5 Expenses 6 non-null object
6 Profit 9 non-null float64
7 Growth 5 non-null float64
dtypes: float64(4), object(4)
memory usage: 648.0+ bytes

In [25]: data['Revenue']=data['Revenue'].astype(int)
In [26]: data.info() # now Revenue has been changed into integer value------>>>>
--- ------ -------------- -----
4 Revenue 9 non-null int32
dtypes: float64(4), int32(1), object(3)
In [27]: # NOW WE PERFORM THE SAME STEPS FOR THE EXPENSE COLUMN----->>>>>>>>>

#STEP 1 =>> fill zero where NAN values in the column

data.fillna({'Expenses':'0'},inplace=True)
data
6,482,465
0 1.0 Lamtone IT Services 2009.0 11757018 5274553.0 0.30
Dollars
Financial 916,455
1 2.0 Stripfind 2010.0 12329371 11412916.0 NaN
Services Dollars
2 3.0 Canecorporation Health 2012.0 10597009 0 3005820.0 NaN
7,429,377
Dollars
7,435,363
4 5.0 NaN NaN NaN 0 3138627.0 NaN
Dollars
5,470,303
5 6.0 Techline Health 2006.0 0 8427816.0 0.23
Dollars
6 7.0 Cityace NaN 2010.0 9254614 0 3005116.0 0.06
3,878,113
7 8.0 Kayelectronics NaN 2009.0 9451943 5573830.0 0.04
Dollars
8 9.0 Ganzlax IT Services 2011.0 14001180 0 11901180.0 0.18

In [28]: # STEP 2 =>> make list

l=data['Expenses']
l2=list(l)
l2
Out[28]: ['6,482,465 Dollars',

'916,455 Dollars',
'0',
'7,429,377 Dollars',
'7,435,363 Dollars',
'5,470,303 Dollars',
'0',
'3,878,113 Dollars',
'0']
In [38]: # STEP 3=>> remove extra things like [Dollars] and comma(,) from the list-
m=[]
for i in l2:
e=""
for j in i:
if(j.isdigit()):
e=e+j
m.append(e)
In [39]: m
Out[39]: ['6482465',
'916455',
'0',
'7429377',
'7435363',
'5470303',
'0',
'3878113',
'0']
In [40]: data.info()
--- ------ -------------- -----

In [41]: data['Expenses']=m
data
0 1.0 Lamtone IT Services 2009.0 11757018 6482465 5274553.0 0.30
Financial
1 2.0 Stripfind 2010.0 12329371 916455 11412916.0 NaN
Services
3 4.0 NaN IT Services 2013.0 0 7429377 6597557.0 NaN
4 5.0 NaN NaN NaN 0 7435363 3138627.0 NaN
5 6.0 Techline Health 2006.0 0 5470303 8427816.0 0.23
6 7.0 Cityace NaN 2010.0 9254614 0 3005116.0 0.06
7 8.0 Kayelectronics NaN 2009.0 9451943 3878113 5573830.0 0.04
In [42]: data['Expenses']=data['Expenses'].astype(int)
In [43]: data.info()
--- ------ -------------- -----
5 Expenses 9 non-null int32
In [44]: gf=data.select_dtypes(['int','float']) # this will privide the inte

gf
Out[44]: ID Inception Revenue Expenses Profit Growth
0 1.0 2009.0 11757018 6482465 5274553.0 0.30
1 2.0 2010.0 12329371 916455 11412916.0 NaN
2 3.0 2012.0 10597009 0 3005820.0 NaN
3 4.0 2013.0 0 7429377 6597557.0 NaN
4 5.0 NaN 0 7435363 3138627.0 NaN
5 6.0 2006.0 0 5470303 8427816.0 0.23
6 7.0 2010.0 9254614 0 3005116.0 0.06
7 8.0 2009.0 9451943 3878113 5573830.0 0.04
8 9.0 2011.0 14001180 0 11901180.0 0.18

In [46]: gf.isnull().sum()/len(gf)*100 # this provide the NaN value in percentage
Out[46]: ID 0.000000
Inception 11.111111
Revenue 0.000000
Expenses 0.000000
Profit 0.000000
Growth 44.444444
dtype: float64
In [47]: gf
0 1.0 2009.0 11757018 6482465 5274553.0 0.30
1 2.0 2010.0 12329371 916455 11412916.0 NaN
2 3.0 2012.0 10597009 0 3005820.0 NaN
3 4.0 2013.0 0 7429377 6597557.0 NaN
4 5.0 NaN 0 7435363 3138627.0 NaN
5 6.0 2006.0 0 5470303 8427816.0 0.23
6 7.0 2010.0 9254614 0 3005116.0 0.06
7 8.0 2009.0 9451943 3878113 5573830.0 0.04
8 9.0 2011.0 14001180 0 11901180.0 0.18
In [48]: # CHECK OUTLAYER IN GROWTH COLUMN------

t=gf['Growth']
t
Out[48]: 0 0.30
1 NaN
2 NaN
3 NaN
4 NaN
5 0.23
6 0.06
7 0.04
8 0.18
Name: Growth, dtype: float64
In [ ]:
In [ ]:

In [49]: t.plot.box()
Out[49]: <Axes: >

In [52]: # CHECK OUTLAYER IN INCEPTION COLUMN----

t2=gf['Inception']
t2
t2.plot.box()
Out[52]: <Axes: >
In [50]: # FILL THE MEAN VALUE IN PLACE OF THE NaN VALUES------------>>

gf.fillna(gf.mean())
0 1.0 2009.0 11757018 6482465 5274553.0 0.300
1 2.0 2010.0 12329371 916455 11412916.0 0.162
2 3.0 2012.0 10597009 0 3005820.0 0.162
3 4.0 2013.0 0 7429377 6597557.0 0.162
4 5.0 2010.0 0 7435363 3138627.0 0.162
5 6.0 2006.0 0 5470303 8427816.0 0.230
6 7.0 2010.0 9254614 0 3005116.0 0.060
7 8.0 2009.0 9451943 3878113 5573830.0 0.040
8 9.0 2011.0 14001180 0 11901180.0 0.180
In [57]: gf['Growth']=gf['Growth'].fillna(gf['Growth'].median()) # fill meadian va

In [58]: gf #(here median is 0.18)
0 1.0 2009.0 11757018 6482465 5274553.0 0.30
1 2.0 2010.0 12329371 916455 11412916.0 0.18
2 3.0 2012.0 10597009 0 3005820.0 0.18
3 4.0 2013.0 0 7429377 6597557.0 0.18
4 5.0 NaN 0 7435363 3138627.0 0.18
5 6.0 2006.0 0 5470303 8427816.0 0.23
6 7.0 2010.0 9254614 0 3005116.0 0.06
7 8.0 2009.0 9451943 3878113 5573830.0 0.04
8 9.0 2011.0 14001180 0 11901180.0 0.18
In [59]: data
0 1.0 Lamtone IT Services 2009.0 11757018 6482465 5274553.0 0.30
Financial
1 2.0 Stripfind 2010.0 12329371 916455 11412916.0 NaN
Services
3 4.0 NaN IT Services 2013.0 0 7429377 6597557.0 NaN
4 5.0 NaN NaN NaN 0 7435363 3138627.0 NaN
5 6.0 Techline Health 2006.0 0 5470303 8427816.0 0.23
6 7.0 Cityace NaN 2010.0 9254614 0 3005116.0 0.06
7 8.0 Kayelectronics NaN 2009.0 9451943 3878113 5573830.0 0.04
In [60]: gf=data.select_dtypes(['int','float']) # this will privide the inte

gf
0 1.0 2009.0 11757018 6482465 5274553.0 0.30
1 2.0 2010.0 12329371 916455 11412916.0 NaN
2 3.0 2012.0 10597009 0 3005820.0 NaN
3 4.0 2013.0 0 7429377 6597557.0 NaN
4 5.0 NaN 0 7435363 3138627.0 NaN
5 6.0 2006.0 0 5470303 8427816.0 0.23
6 7.0 2010.0 9254614 0 3005116.0 0.06
7 8.0 2009.0 9451943 3878113 5573830.0 0.04
8 9.0 2011.0 14001180 0 11901180.0 0.18

In [62]: gf.fillna(0) # fill zero in place of NaN values

0 1.0 2009.0 11757018 6482465 5274553.0 0.30
1 2.0 2010.0 12329371 916455 11412916.0 0.00
2 3.0 2012.0 10597009 0 3005820.0 0.00
3 4.0 2013.0 0 7429377 6597557.0 0.00
4 5.0 0.0 0 7435363 3138627.0 0.00
5 6.0 2006.0 0 5470303 8427816.0 0.23
6 7.0 2010.0 9254614 0 3005116.0 0.06
7 8.0 2009.0 9451943 3878113 5573830.0 0.04
8 9.0 2011.0 14001180 0 11901180.0 0.18
In [ ]: # FILL THE LINEAR VALUES INTO THE DATA--------->>>>>>>>>

CLEANING DATA SET - Jupyter Notebook

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

CLEANING DATA SET - Jupyter Notebook

Uploaded by

Copyright:

Available Formats

30/10/2023, 10:31 CLEANING DATA SET - Jupyter Notebook

In [1]: import pandas as pd

Out[1]: id name lastname age city salary

0 111 Rohit Verma 27 Meerut 2000

1 112 Monu Kasana 23 Ghaziabad 5000

2 113 Vinod Sharma 28 Noida 12000

3 114 Satish Bhati 25 Bulandsher 4000

4 115 Manish Dhama 23 Greater Noida 10000

5 116 Sachin Dedha 24 Mujaffarnagar 9000

6 117 Manoj Tyagi 22 New Delhi 14000

In [2]: # CLEANING DATASET WHI WORK KREGA JHA PAR [NAN]

localhost:8888/notebooks/CLEANING DATA SET.ipynb 1/15

In [7]: import pandas as pd

Out[7]: ID Name Industry Inception Revenue Expenses Profit Growth Sa

2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN N

6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06 N

9 NaN NaN NaN NaN NaN NaN NaN NaN N

localhost:8888/notebooks/CLEANING DATA SET.ipynb 2/15

Out[11]: ID Name Industry Inception Revenue Expenses Profit Growth Sa

2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN N

6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06 N

9 NaN NaN NaN NaN NaN NaN NaN NaN N

localhost:8888/notebooks/CLEANING DATA SET.ipynb 3/15

Out[13]: ID Name Industry Inception Revenue Expenses Profit Growth

2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN

6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06

9 NaN NaN NaN NaN NaN NaN NaN NaN

In [14]: # AB ROW KE LIYE CHECK KRENGE -------->>>

localhost:8888/notebooks/CLEANING DATA SET.ipynb 4/15

Out[16]: ID Name Industry Inception Revenue Expenses Profit Growth

2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN

6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06

8 9.0 Ganzlax IT Services 2011.0 $14,001,180 NaN 11901180.0 0.18

Out[17]: ID Name Industry Inception Revenue Expenses Profit Growth

0 1.0 Lamtone IT Services 2009.0 $11,757,018 6,482,465 Dollars 5274553.0 0.3

Out[18]: ID Name Industry Inception Revenue Expenses Profit Growth

2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN

6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06

8 9.0 Ganzlax IT Services 2011.0 $14,001,180 NaN 11901180.0 0.18

localhost:8888/notebooks/CLEANING DATA SET.ipynb 5/15

Out[19]: ID Name Industry Inception Revenue Expenses Profit Growth

2 3.0 Canecorporation Health 2012.0 $10,597,009 NaN 3005820.0 NaN

6 7.0 Cityace NaN 2010.0 $9,254,614 NaN 3005116.0 0.06

8 9.0 Ganzlax IT Services 2011.0 $14,001,180 NaN 11901180.0 0.18

In [20]: # STEP 2 =>> make list

Out[20]: ['$11,757,018 ',

localhost:8888/notebooks/CLEANING DATA SET.ipynb 6/15

Out[22]: ['11757018 ',

Out[23]: ID Name Industry Inception Revenue Expenses Profit Growth

2 3.0 Canecorporation Health 2012.0 10597009 NaN 3005820.0 NaN

6 7.0 Cityace NaN 2010.0 9254614 NaN 3005116.0 0.06

8 9.0 Ganzlax IT Services 2011.0 14001180 NaN 11901180.0 0.18

localhost:8888/notebooks/CLEANING DATA SET.ipynb 7/15

Out[27]: ID Name Industry Inception Revenue Expenses Profit Growth

2 3.0 Canecorporation Health 2012.0 10597009 0 3005820.0 NaN

6 7.0 Cityace NaN 2010.0 9254614 0 3005116.0 0.06

8 9.0 Ganzlax IT Services 2011.0 14001180 0 11901180.0 0.18

localhost:8888/notebooks/CLEANING DATA SET.ipynb 8/15

In [28]: # STEP 2 =>> make list

Out[28]: ['6,482,465 Dollars',

localhost:8888/notebooks/CLEANING DATA SET.ipynb 9/15

Out[41]: ID Name Industry Inception Revenue Expenses Profit Growth

0 1.0 Lamtone IT Services 2009.0 11757018 6482465 5274553.0 0.30