You are on page 1of 6

ID NO:160031327

SEC:23
NAME:Syed.Umar
LAB EXPT -1

PREPROCESSING OF THE DATASETS:

a.)Impute missing values:

Source code:

install.packages("mice")

library(mice)

install.packages("VIM")

library(VIM)

install.packages("lattice")

library(lattice)

data(nhanes)

str(nhanes)

nhanes$age=as.factor(nhanes$age)

md.pattern(nhanes)

nhanes_miss=aggr(nhanes,col=mdc(1:2),numbers=TRUE,sortVars=TRUE,labels=names(nha
nes), cex.axis=.7, gap=3, ylab=c("Proportion of missingness","Missingness Pattern"))

marginplot(nhanes[, c("chl", "bmi")], col = mdc(1:2), cex.numbers = 1.2,pch = 19)

mice_imputes = mice(nhanes, m=5, maxit = 40)

mice_imputes$method

mice_imputes$imp$chl

Imputed_data=complete(mice_imputes,5)

xyplot(mice_imputes, bmi~ chl | .imp, pch = 20, cex = 1.4)

densityplot(mice_imputes)

lm_5_model=with(mice_imputes,lm(chl~age+bmi+hyp))

combo_5_model=pool(lm_5_model)
ID NO:160031327
SEC:23
NAME:Syed.Umar
Output:
ID NO:160031327
SEC:23
NAME:Syed.Umar

b.)Initial data exploration and Visualization in R.

Source code:

install.packages("mice")

library(mice)

install.packages("VIM")

library(VIM)

install.packages("lattice")

library(lattice)

data(nhanes)

tail(nhanes)

head(nhanes)

length(nhanes)

names(nhanes)

nrow(nhanes)

View(nhanes)
ID NO:160031327
SEC:23
NAME:Syed.Umar
plot(nhanes)

barchart(nhanes)

Output:
ID NO:160031327
SEC:23
NAME:Syed.Umar
c.)Split data into training set and testing set:

Source code:

setwd("C:\\Users\\USER\\Documents\\3-2\\ml")

datasets=read.csv("position_salaries.csv")

install.packages("datasets")

library(datasets)

dataset=datasets[2:3]

install.packages("caTools")

library(caTools)

set.seed(123)

split=sample.split(datasets$Salary,SplitRatio = 2/3)

training_set=subset(datasets,split==TRUE)

test_set=subset(datasets,split==FALSE)

Output:
ID NO:160031327
SEC:23
NAME:Syed.Umar

You might also like