Professional Documents
Culture Documents
import numpy as np
import pandas as pd
Reading Data
In [2]: data_1 = pd.read_csv('./Datasets/startup_funding.csv')
data = data_1.copy()
data.head()
Tiger Global
0 1 09/01/2020 BYJU’S E-Tech E-learning Bengaluru
Management
Retailer of
baby and Sequoia
2 3 09/01/2020 Mamaearth E-commerce Bengaluru
toddler Capital India
products
Embroiled Sprout
Fashion and
4 5 02/01/2020 Fashor Clothes For Mumbai Venture
Apparel
Women Partners
In [3]: data.shape
Cleaning Data
In [4]: data.isnull().sum()
Out[4]: Sr No 0
Date dd/mm/yyyy 0
Startup Name 0
SubVertical 936
Investors Name 24
InvestmentnType 4
Remarks 2625
dtype: int64
data.Date.dtype
Out[5]: dtype('O')
def clean_string(x):
unique_dates = data.Date.unique().tolist()
# unique_dates
data.Date = data.Date.str.replace('.','/' )
data.Date = data.Date.str.replace('//','/')
year = year.value_counts().sort_index()
x = year.index
y = year.values
plt.plot(x,y)
plt.title('Trend of investments')
plt.xlabel("Year")
plt.ylabel("Number of Fundings")
plt.show()
for i in range(3):
def clean_amount(x):
x = ''.join([c for c in str(x) if c in ['0', '1', '2', '3', '4', '5', '6', '7',
x = str(x).replace(",","").replace("+","")
x = str(x).lower().replace("undisclosed","")
x = str(x).lower().replace("n/a","")
if x == '':
x = '-999'
return x
plt.show()
data_temp = data.copy()
data_temp = data_temp[data_temp['City'].notnull()]
data_temp.City.dropna(inplace = True)
def separateCity(city):
return city.split('/')[0].strip()
data_temp.City = data_temp.City.apply(separateCity)
city_num = data.City.value_counts()[0:10]
city = city_num.index
num_city = city_num.values
## plotting a pie chart shwoing percentage share of each city in no. of startups the
plt.rcParams['figure.figsize'] = (15,9)
for i in range(len(city)):
city_amount = data_temp.groupby('City')['AmountInUSD'].sum().sort_values(ascending =
city = city_amount.index
amountCity = city_amount.values
for i in range(len(city)):
Bangalore 31.10 %
Bengaluru 23.45 %
Mumbai 13.51 %
Gurgaon 9.52 %
Noida 3.50 %
nan 3.46 %
Gurugram 2.36 %
Chennai 1.96 %
Pune 1.95 %