You are on page 1of 2

#missing values

column missing_values
1 type 0
2 title 0
3 director 1969
4 cast 570
5 country 476
6 date_added 11
7 release_year 0
8 rating 10
9 duration 0
10 listed_in 0
11 description 0

#load dataset

net <- read.csv('netflix_titles.csv', header = TRUE, na.strings = c('NA', ''), stringsAsFactors = F)

head(net)

#data cleaning- missing values, Drop show id

net <- subset(net, select = -c(show_id))

data.frame('column' = c(colnames(net)), 'missing_values' = sapply(net, function(x) sum(is.na(x))),


row.names = NULL)

#calculate mode of ratings

r <- net$rating

summary(r)

mode <- function(r){

unique_r <- unique(r)

unique_r[which.max(tabulate(match(r, unique_r)))]}

result <- mode(r)

print(result)#fill ratings

#Result = TV-MA, fill the missing values with TV-MA

net$rating[is.na(net$rating)] <- 'TV-MA'

sum(is.na(net$rating))
#time series format change

You might also like