Professional Documents
Culture Documents
In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
In [3]:
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3049:
DtypeWarning: Columns (48,49) have mixed types. Specify dtype option on import or set
low_memory=False.
In [4]:
service311.head()
Out[4]:
Unique Key Created Date Closed Date Agency Agency Name Complaint Type Descriptor
Location Type Incident Zip Incident Address ... Bridge Highway Name
Bridge Highway Direction Road Ramp Bridge Highway Segment Garage Lot
Name Ferry Direction Ferry Terminal Name Latitude Longitude Location
5 rows × 53 columns
In [5]:
service311.shape
Out[5]:
(300698, 53)
In [6]:
service311.columns
Out[6]:
'Incident Address', 'Street Name', 'Cross Street 1', 'Cross Street 2',
dtype='object')
In [8]:
service311['Complaint Type'].unique()
Out[8]:
dtype=object)
In [10]:
service311['Descriptor'].unique()
Out[10]:
In [11]:
complaintTypecity = pd.DataFrame({'count':
service311.groupby(['Complaint Type','City']).size()}).reset_index()
complaintTypecity
Out[11]:
In [12]:
service311.groupby(['Borough','Complaint Type','Descriptor']).size()
Out[12]:
In Car 36
Neglected 673
No Shelter 71
Tortured 192
In Public 142
Detached Trailer 73
Car/Truck Horn 28
Car/Truck Music 90
...
Homeless Issue 1
Detached Trailer 3
Car/Truck Horn 14
Car/Truck Music 17
Loud Talking 1
Loud Talking 5
Car/Truck Music 38
Engine Idling 11
Unlicensed 5
In [ ]:
In [9]:
import datetime
In [13]:
df = pd.read_csv("311_Service_Requests_from_2010_to_Present.csv", parse_dates=["Created
Date", "Closed Date"])
In [14]:
In [22]:
df['Status'].value_counts().plot(kind='bar',alpha=0.6,figsize=(7,7))
plt.show()
In [20]:
#Complaint type Breakdown with bar plot to figure out majority of complaint types and top 10
complaints
service311['Complaint Type'].value_counts().head(10).plot(kind='barh',figsize=(5,5));
In [21]:
service311.groupby(["Borough","Complaint Type","Descriptor"]).size()
Out[21]:
In Car 36
Neglected 673
No Shelter 71
Tortured 192
In Public 142
Detached Trailer 73
Car/Truck Horn 28
Car/Truck Music 90
...
Homeless Issue 1
Detached Trailer 3
Car/Truck Horn 14
Car/Truck Music 17
Loud Talking 53
Loud Talking 1
Loud Talking 5
Car/Truck Music 38
Engine Idling 11
Unlicensed 5
In [23]:
majorcomplints=service311.dropna(subset=["Complaint Type"])
majorcomplints=service311.groupby("Complaint Type")
sortedComplaintType = sortedComplaintType.to_frame('count').reset_index()
sortedComplaintType
sortedComplaintType.head(10)
Out[23]:
7 Traffic 4498
In [25]:
sortedComplaintType = sortedComplaintType.head()
plt.figure(figsize=(5,5))
plt.pie(sortedComplaintType['count'],labels=sortedComplaintType["Complaint Type"],
autopct="%1.1f%%")
plt.show()
In [26]:
In [26]:
grp_data.shape
Out[26]:
(77044, 54)
In [27]:
df.isnull().sum()
Out[27]:
Unique Key 0
Created Date 0
Agency 0
Agency Name 0
Complaint Type 0
Descriptor 5914
City 2614
Landmark 300349
Status 0
Due Date 3
Resolution Description 0
Community Board 0
Borough 0
Park Borough 0
School Name 0
School Number 0
School Region 1
School Code 1
School Address 0
School City 0
School State 0
School Zip 1
Latitude 3540
Longitude 3540
Location 3540
Request_Closing_Time 2164
dtype: int64
In [29]:
df['City'].dropna(inplace=True)
In [31]:
df['City'].shape
Out[31]:
(298084,)
In [32]:
grp_data['City'].isnull().sum()
Out[32]:
283
In [33]:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py:6130:
SettingWithCopyWarning:
self._update_inplace(new_data)
In [35]:
#Scatter plot displaying all the cities that raised complaint of type 'Blocked Driveway'
plt.figure(figsize=(20, 15))
plt.scatter(grp_data['Complaint Type'],grp_data['City'])
plt.title('Plot showing list of cities that raised complaint of type Blocked Driveway')
plt.show()
In [ ]: