Professional Documents
Culture Documents
Deepthi Miniproject-1
Deepthi Miniproject-1
import numpy as np
In [113… dataset.head()
Out[113]: approx_cost(for
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines reviews_list menu_item listed_in(type) listed_in(city)
two people)
+91
https://www.zomato.com/bangalore/grand- 10, 3rd Floor, Lakshmi Grand Casual North Indian, [('Rated 4.0', 'RATED\n
4 No No 3.8/5 166 8026612447\r\n+91 Basavanagudi Panipuri, Gol Gappe 600 [] Buffet Banashankari
village... Associates, Gandhi Baza... Village Dining Rajasthani Very good restaurant ...
9901210005
In [114… dataset.shape
(51717, 17)
Out[114]:
In [115… dataset.columns
Out[115]:
'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
'listed_in(type)', 'listed_in(city)'],
dtype='object')
In [116… dataset.info()
<class 'pandas.core.frame.DataFrame'>
dataset.head()
Out[117]: name online_order book_table rate votes location rest_type cuisines approx_cost(for two people) listed_in(type) listed_in(city)
0 Jalsa Yes Yes 4.1/5 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800 Buffet Banashankari
1 Spice Elephant Yes No 4.1/5 787 Banashankari Casual Dining Chinese, North Indian, Thai 800 Buffet Banashankari
2 San Churro Cafe Yes No 3.8/5 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800 Buffet Banashankari
3 Addhuri Udupi Bhojana No No 3.7/5 88 Banashankari Quick Bites South Indian, North Indian 300 Buffet Banashankari
4 Grand Village No No 3.8/5 166 Basavanagudi Casual Dining North Indian, Rajasthani 600 Buffet Banashankari
dataset.shape
(51609, 11)
Out[118]:
In [119… dataset['rate'].unique()
Out[119]:
'3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
'3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
'4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
'3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
'4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
'4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
if(value=='NEW' or value=='-'):
return np.nan
else:
value = str(value).split('/')
value = value[0]
return float(value)
dataset['rate'] = dataset['rate'].apply(handlerate)
dataset['rate'].head()
0 4.1
Out[120]:
1 4.1
2 3.8
3 3.7
4 3.8
dataset['rate'].isnull().sum()
0
Out[121]:
dataset.head()
Out[122]: name online_order book_table rate votes location rest_type cuisines approx_cost(for two people) listed_in(type) listed_in(city)
0 Jalsa Yes Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800 Buffet Banashankari
1 Spice Elephant Yes No 4.1 787 Banashankari Casual Dining Chinese, North Indian, Thai 800 Buffet Banashankari
2 San Churro Cafe Yes No 3.8 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800 Buffet Banashankari
3 Addhuri Udupi Bhojana No No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300 Buffet Banashankari
4 Grand Village No No 3.8 166 Basavanagudi Casual Dining North Indian, Rajasthani 600 Buffet Banashankari
dataset.head()
Out[123]: name online_order book_table rate votes location rest_type cuisines approx_cost(for two people) listed_in(type) listed_in(city)
0 Jalsa Yes Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800 Buffet Banashankari
1 Spice Elephant Yes No 4.1 787 Banashankari Casual Dining Chinese, North Indian, Thai 800 Buffet Banashankari
2 San Churro Cafe Yes No 3.8 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800 Buffet Banashankari
3 Addhuri Udupi Bhojana No No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300 Buffet Banashankari
4 Grand Village No No 3.8 166 Basavanagudi Casual Dining North Indian, Rajasthani 600 Buffet Banashankari
dataset.head()
Out[124]: name online_order book_table rate votes location rest_type cuisines Cost2plates Type listed_in(city)
0 Jalsa Yes Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800 Buffet Banashankari
1 Spice Elephant Yes No 4.1 787 Banashankari Casual Dining Chinese, North Indian, Thai 800 Buffet Banashankari
2 San Churro Cafe Yes No 3.8 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800 Buffet Banashankari
3 Addhuri Udupi Bhojana No No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300 Buffet Banashankari
4 Grand Village No No 3.8 166 Basavanagudi Casual Dining North Indian, Rajasthani 600 Buffet Banashankari
In [125… dataset['location'].unique()
Out[125]:
'Kumaraswamy Layout', 'Rajarajeshwari Nagar', 'Vijay Nagar',
In [126… dataset['listed_in(city)'].unique()
Out[126]:
'Brigade Road', 'Brookefield', 'BTM', 'Church Street',
In [128… dataset['Cost2plates'].unique()
Out[128]:
'900', '200', '750', '150', '850', '100', '1,200', '350', '250',
value = str(value)
if ',' in value:
return float(value)
else:
return float(value)
dataset['Cost2plates'] = dataset['Cost2plates'].apply(handlecomma)
dataset['Cost2plates'].unique()
array([ 800., 300., 600., 700., 550., 500., 450., 650., 400.,
Out[129]:
900., 200., 750., 150., 850., 100., 1200., 350., 250.,
In [130… dataset.head()
Out[130]: name online_order book_table rate votes location rest_type cuisines Cost2plates Type
0 Jalsa Yes Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800.0 Buffet
1 Spice Elephant Yes No 4.1 787 Banashankari Casual Dining Chinese, North Indian, Thai 800.0 Buffet
2 San Churro Cafe Yes No 3.8 918 Banashankari Cafe, Casual Dining Cafe, Mexican, Italian 800.0 Buffet
3 Addhuri Udupi Bhojana No No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300.0 Buffet
4 Grand Village No No 3.8 166 Basavanagudi Casual Dining North Indian, Rajasthani 600.0 Buffet
rest_types
Out[131]:
Casual Dining 10253
Cafe 3682
Delivery 2574
...
rest_types_lessthan1000
Out[132]:
Bar 686
...
if(value in rest_types_lessthan1000):
return 'others'
else:
return value
dataset['rest_type'] = dataset['rest_type'].apply(handle_rest_type)
dataset['rest_type'].value_counts()
Out[133]:
Casual Dining 10253
others 9003
Cafe 3682
Delivery 2574
Bakery 1140
location_lessthan300 = location[location<300]
def handle_location(value):
if(value in location_lessthan300):
return 'others'
else:
return value
dataset['location'] = dataset['location'].apply(handle_location)
dataset['location'].value_counts()
BTM 5056
Out[134]:
others 4954
HSR 2494
JP Nagar 2218
Whitefield 2105
Indiranagar 2026
Jayanagar 1916
Marathahalli 1805
Bellandur 1268
Ulsoor 1011
Banashankari 902
MG Road 893
Malleshwaram 721
Basavanagudi 684
Brookefield 656
Banaswadi 640
Kammanahalli 639
Rajajinagar 591
Shivajinagar 498
Domlur 482
Ejipura 433
cuisines_lessthan100 = cuisines[cuisines<100]
def handle_cuisines(value):
if(value in cuisines_lessthan100):
return 'others'
else:
return value
dataset['cuisines'] = dataset['cuisines'].apply(handle_cuisines)
dataset['cuisines'].value_counts()
others 26159
Out[135]:
North Indian 2852
Biryani 903
...
In [136… dataset.head()
Out[136]: name online_order book_table rate votes location rest_type cuisines Cost2plates Type
0 Jalsa Yes Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800.0 Buffet
1 Spice Elephant Yes No 4.1 787 Banashankari Casual Dining others 800.0 Buffet
2 San Churro Cafe Yes No 3.8 918 Banashankari others others 800.0 Buffet
3 Addhuri Udupi Bhojana No No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300.0 Buffet
4 Grand Village No No 3.8 166 Basavanagudi Casual Dining others 600.0 Buffet
ax = sns.countplot(dataset['location'])
plt.xticks(rotation=90)
C:\Users\roopa\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other argume
nts without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[137]:
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
[Text(0, 0, 'Banashankari'),
Text(1, 0, 'Basavanagudi'),
Text(2, 0, 'others'),
Text(3, 0, 'Jayanagar'),
Text(6, 0, 'BTM'),
Text(8, 0, 'HSR'),
Text(9, 0, 'Marathahalli'),
Text(15, 0, 'Bellandur'),
Text(17, 0, 'Whitefield'),
Text(19, 0, 'Indiranagar'),
Text(26, 0, 'Ulsoor'),
Text(28, 0, 'Shivajinagar'),
Text(32, 0, 'Domlur'),
Text(33, 0, 'Ejipura'),
Text(34, 0, 'Malleshwaram'),
Text(35, 0, 'Kammanahalli'),
Text(37, 0, 'Brookefield'),
Text(38, 0, 'Rajajinagar'),
Text(39, 0, 'Banaswadi'),
C:\Users\aud\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other argume
nts without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:xlabel='online_order', ylabel='count'>
Out[138]:
In [139… plt.figure(figsize=(6,6))
C:\Users\audwin\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other argume
nts without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:xlabel='book_table', ylabel='count'>
Out[139]:
<AxesSubplot:xlabel='online_order', ylabel='rate'>
Out[140]:
<AxesSubplot:xlabel='book_table', ylabel='rate'>
Out[141]:
dataset1.to_csv('location_online.csv')
dataset1 = pd.read_csv('location_online.csv')
dataset1
Out[142]: name
online_order No Yes
location
<AxesSubplot:xlabel='location'>
Out[143]:
dataset2.to_csv('location_booktable.csv')
dataset2 = pd.read_csv('location_booktable.csv')
dataset2
Out[144]: name
book_table No Yes
location
Banashankari 839 63
Banaswadi 632 8
Basavanagudi 668 16
Bellandur 1170 98
Brookefield 582 74
Domlur 427 55
Ejipura 433 0
Kammanahalli 590 49
Malleshwaram 632 89
Rajajinagar 550 41
Shivajinagar 475 23
<AxesSubplot:xlabel='location'>
Out[145]:
<AxesSubplot:xlabel='Type', ylabel='rate'>
Out[146]:
dataset3.to_csv('location_Type.csv')
dataset3 = pd.read_csv('location_Type.csv')
dataset3
Out[147]: name
Type Buffet Cafes Delivery Desserts Dine-out Drinks & nightlife Pubs and bars
location
<AxesSubplot:xlabel='location'>
Out[148]:
dataset4.drop_duplicates()
dataset5 = dataset4.groupby(['location'])['votes'].sum()
dataset5 = dataset5.to_frame()
dataset5.head()
Out[149]: votes
location
Indiranagar 1165909
JP Nagar 586522
sns.barplot(dataset5.index , dataset5['votes'])
plt.xticks(rotation = 90)
C:\Users\audwin\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arg
uments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[150]:
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
Text(1, 0, 'Indiranagar'),
Text(5, 0, 'BTM'),
Text(6, 0, 'others'),
Text(7, 0, 'HSR'),
Text(10, 0, 'Jayanagar'),
Text(11, 0, 'Whitefield'),
Text(13, 0, 'Marathahalli'),
Text(21, 0, 'Malleshwaram'),
Text(23, 0, 'Bellandur'),
Text(24, 0, 'Ulsoor'),
Text(27, 0, 'Banashankari'),
Text(29, 0, 'Brookefield'),
Text(32, 0, 'Kammanahalli'),
Text(34, 0, 'Domlur'),
Text(35, 0, 'Basavanagudi'),
Text(36, 0, 'Rajajinagar'),
Text(38, 0, 'Banaswadi'),
Text(40, 0, 'Ejipura'),
Text(41, 0, 'Shivajinagar')])
In [151… dataset.head()
Out[151]: name online_order book_table rate votes location rest_type cuisines Cost2plates Type
0 Jalsa Yes Yes 4.1 775 Banashankari Casual Dining North Indian, Mughlai, Chinese 800.0 Buffet
1 Spice Elephant Yes No 4.1 787 Banashankari Casual Dining others 800.0 Buffet
2 San Churro Cafe Yes No 3.8 918 Banashankari others others 800.0 Buffet
3 Addhuri Udupi Bhojana No No 3.7 88 Banashankari Quick Bites South Indian, North Indian 300.0 Buffet
4 Grand Village No No 3.8 166 Basavanagudi Casual Dining others 600.0 Buffet
dataset6.drop_duplicates()
dataset7 = dataset6.groupby(['cuisines'])['votes'].sum()
dataset7 = dataset7.to_frame()
dataset7.head()
Out[152]: votes
cuisines
others 11542182
dataset7.head()
Out[153]: votes
cuisines
Chinese 101728
sns.barplot(dataset7.index , dataset7['votes'])
plt.xticks(rotation = 90)
C:\Users\audwin\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arg
uments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Out[155]:
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68]),
Text(4, 0, 'Chinese'),
Text(5, 0, 'Cafe'),
Text(7, 0, 'Desserts'),
Text(15, 0, 'Biryani'),
Text(19, 0, 'Continental'),
Text(41, 0, 'Arabian'),
Text(43, 0, 'Pizza'),
Text(49, 0, 'Bakery'),
Text(50, 0, 'Andhra'),
Text(52, 0, 'Kerala'),
Text(63, 0, 'Beverages'),
Text(68, 0, 'Mithai')])