Professional Documents
Culture Documents
Pima
Pima
library(caret)
pima<-read.csv("pima-indians-
diabetes.csv",col.names=c("Pregnant","Plasma_Glucose","Dias_BP","Triceps_Skin","Ser
um_Insulin","BMI","DPF","Age","Diabetes"))
View(pima)
str(pima)
is.na(pima)
sapply(pima, function(x) sum(is.na(x)))
-----
-----
or
library(gridExtra)
p1 <- ggplot(pima, aes(x = Diabetes, y = Pregnant,fill = Diabetes)) +
geom_boxplot() +
theme(legend.position = "bottom") +
ggtitle("Number of pregnancies Vs Diabetes")
p2 <- ggplot(pima,aes(x=Serum_Insulin,y=Plasma_Glucose))+
geom_point(aes(color=Diabetes))+
theme(legend.position = "bottom") +
ggtitle("Relationship of Insulin with Glucose Vs Diabetes")
p1 <- ggplot(pima,aes(x=BMI,y=Dias_BP))+
geom_point(aes(color=Diabetes))+
theme(legend.position = "bottom") +
ggtitle("Relationship of BMI with BP Vs Diabetes")
p2 <- ggplot(pima,aes(x=BMI,y=Triceps_Skin))+
geom_point(aes(color=Diabetes))+
theme(legend.position = "bottom") +
ggtitle("Relationship of BMI with Skin Thickness Vs Diabetes")
confusionMatrix(glm_pred, pima_testing$Diabetes )
confusionMatrix(data = factor(glm_pred),reference = factor(pima_testing$Diabetes))
confusionMatrix(factor(glm_pred),factor(pima_testing$Diabetes))
-------------------------------------------------------------------------
Model 2:DT
-----------------------------
library(tree)
library(e1071)
library(party)
confusionMatrix(tree_pred, pima_testing$Diabetes)
---------------------------
set.seed(123)
library(randomForest)
rf_pima <- randomForest(Diabetes ~., data = pima_training, mtry = 8, ntree=50,
importance = TRUE)
rf_probs <- predict(rf_pima, newdata = pima_testing)
mean(rf_probs != pima_testing$Diabetes) * 100
confusionMatrix(rf_probs, pima_testing$Diabetes)
or
confusionMatrix(data = factor(rf_probs),reference = factor(pima_testing$Diabetes))
importance(rf_pima)
-----------------------------------
Model 4: SVM
---------------------------------
library(e1071)
tuned <- tune.svm(Diabetes ~., data = pima_training, gamma =seq(.01, 0.1, by
= .01), cost = seq(0.1,1, by = 0.1))
summary(tuned) # to show the results
svm_model <- svm(Diabetes ~., data = pima_training, kernel = "radial", gamma =
0.01, cost = .1)
summary(svm_model)
svm_pred <- predict(svm_model,pima_testing)
mean(svm_pred != pima_testing$Diabetes) * 100
confusionMatrix(svm_pred, pima_testing$Diabetes)
---------------------
Model5:NB
--------------------
library(e1071)
nv_model<- naiveBayes(as.factor(Diabetes) ~., data = pima)
summary(nv_model)
nv_pred <- predict(nv_model, pima_testing)
mean(nv_pred != pima_testing$Diabetes) * 100
confusionMatrix(nv_pred, pima_testing$Diabetes)
------------------
Model6:NN
library(neuralnet)
NN = neuralnet(Diabetes ~.,data=pima_training)
OR
plot(NN,rep="best")
cm<-table(pima_testing$Diabetes,round(prediction$net.result[,1]))
library(psych)
a1<- tr(cm)/sum(cm)
----------
NN = neuralnet(Diabetes ~.,data=pima_training)
Predict=compute(NN,pima_testing)
Predict$net.result
------
-----------------------
library(ggplot2)
accuracy <- data.frame(Model=c("Logistic Regression","Decision Tree","Random
Forest", "Support Vector Machine (SVM)","NB"), Accuracy=c(glm_pred, test_predict,
rf_probs, svm_pred,nv_pred ))
ggplot(accuracy,aes(x=Model,y=Accuracy)) + geom_bar(stat='identity') + theme_bw()
+ ggtitle('Comparison of Model Accuracy')
-----------------------