Professional Documents
Culture Documents
karth
2024-03-30
library(tidyverse)
library(ggplot2)
data=read.csv('C://Users//karth//OneDrive//Desktop//R-CODES//fol//NYC_Dog_Licensing_Dataset.csv') #data
head(data)
1
## RowNumber AnimalName AnimalGender AnimalBirthMonth BreedName Borough
## 1 1753 SHADOW M 01/01/2000 12:00:00 AM Beagle Brooklyn
## 2 2415 ROCCO M 10/01/2011 12:00:00 AM Boxer Brooklyn
## 3 3328 LUIGI M 09/01/2005 12:00:00 AM Maltese Bronx
## 4 7537 PETUNIA F 08/01/2013 12:00:00 AM Pug Brooklyn
## 5 8487 ROMEO M 10/01/2008 12:00:00 AM Maltese Bronx
## 6 10503 BRANDY M 01/01/2004 12:00:00 AM Unknown Brooklyn
## ZipCode CommunityDistrict CensusTract2010 NTA CityCouncilDistrict
## 1 11236 318 1014 BK50 46
## 2 11210 314 756 BK43 45
## 3 10464 210 516 BX10 13
## 4 11221 304 419 BK78 34
## 5 10451 201 65 BX34 17
## 6 11225 309 800 BK60 40
## CongressionalDistrict StateSenatorialDistrict LicenseIssuedDate
## 1 8 19 12/29/2014
## 2 9 17 01/07/2015
## 3 14 34 01/17/2015
## 4 7 18 03/01/2015
## 5 15 32 03/09/2015
## 6 9 20 03/27/2015
## LicenseExpiredDate
## 1 01/30/2016
## 2 01/30/2016
## 3 02/02/2016
## 4 03/28/2016
## 5 03/09/2016
## 6 03/29/2016
summary(data)
2
## NA’s :3337
## CongressionalDistrict StateSenatorialDistrict LicenseIssuedDate
## Min. : 3.00 Min. :10.00 Length:121949
## 1st Qu.: 8.00 1st Qu.:18.00 Class :character
## Median :11.00 Median :25.00 Mode :character
## Mean :10.27 Mean :23.54
## 3rd Qu.:12.00 3rd Qu.:28.00
## Max. :16.00 Max. :36.00
## NA’s :3337 NA’s :3337
## LicenseExpiredDate
## Length:121949
## Class :character
## Mode :character
##
##
##
##
n_distinct(data$AnimalGender)
## [1] 4
## NULL
data=data[! data$AnimalGender %in% c('', ' '),] #Dropping the extra levels
n_distinct(data$AnimalGender)
## [1] 2
n_distinct(data$Borough)
## [1] 79
unique(data$Borough)
3
## [34] "Quens" "Jersey City" "San Francisco"
## [37] "Richmond Hill" "East Elmhurst" "Ridgewood"
## [40] "bronxville" "Hoboken" "NYC"
## [43] "Floral Park" "Elmhurst " "NY"
## [46] "Middle Village" "Potomac" "Ozone Park"
## [49] "Glendale" "Long Island City" "oakland gardens"
## [52] "ozone park" "Staten Island, NY" "Wappingers Falls, NY"
## [55] "Woodside" "B" "Glen Oaks"
## [58] "Woodside NY." "Kew Gardens" "staten island"
## [61] "BELLE HARBOR" "Jackson heights " "Lynbrook"
## [64] "ARVERNE" "Forest Hills" "cambria heights"
## [67] "Middletown" "MASPETH" "West Palm Beach"
## [70] "South Richmond Hil" "Briarwood " "SO RICHMOND"
## [73] "JACKSON HGTS" "Bayside" "kissimmee florida"
## [76] "queens" "WOODSIDE" "Santa Monica"
## [79] "Albany"
#STEP 2:
#Remove values outside the NYC area
data=data[!(data$Borough=="long island city"),]
data=data[!(data$Borough=="albany"),]
data=data[!(data$Borough=="Jersey City"),]
data$Borough [data$Borough=="staten is"] <- "staten island"
data$Borough [data$Borough=="QUEENS"] <- "queens"