Professional Documents
Culture Documents
Sampling
library(ISLR)
library(caret)
set.seed(300)
#Spliting data as training and test set. Using createDataPartition() function from caret
prop.table(table(training$Direction)) * 100
##
## Down Up
## 48.19 51.81
prop.table(table(testing$Direction)) * 100
##
## Down Up
## 48.08 51.92
prop.table(table(Smarket$Direction)) * 100
https://rpubs.com/njvijay/16444 1/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
##
## Down Up
## 48.16 51.84
creteDataParition function creates sample very effortlessly. We don’t need to write complex function like previous
example
Preprocessing
kNN requires variables to be normalized or scaled. caret provides facility to preprocess data. I am going to choose
centring and scaling
preProcValues
##
## Call:
##
knnFit <- train(Direction ~ ., data = training, method = "knn", trControl = ctrl, preProcess = c(
"center","scale"), tuneLength = 20)
knnFit
https://rpubs.com/njvijay/16444 2/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
## k-Nearest Neighbors
##
## 938 samples
## 8 predictors
## 2 classes: 'Down', 'Up'
##
##
## Summary of sample sizes: 844, 844, 844, 845, 844, 845, ...
##
##
##
## Accuracy was used to select the optimal model using the largest value.
plot(knnFit)
https://rpubs.com/njvijay/16444 3/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
#Get the confusion matrix to see accuracy value and other parameter values
confusionMatrix(knnPredict, testing$Direction )
https://rpubs.com/njvijay/16444 4/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
##
## Reference
## Prediction Down Up
## Down 123 8
## Up 27 154
##
## Accuracy : 0.888
##
## Kappa : 0.774
##
## Sensitivity : 0.820
## Specificity : 0.951
## Prevalence : 0.481
##
##
mean(knnPredict == testing$Direction)
## [1] 0.8878
knnFit <- train(Direction ~ ., data = training, method = "knn", trControl = ctrl, preProcess = c(
"center","scale"), tuneLength = 20)
##
##
##
## Warning: The metric "Accuracy" was not in the result set. ROC will be used
## instead.
https://rpubs.com/njvijay/16444 5/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
knnFit
## k-Nearest Neighbors
##
## 938 samples
## 8 predictors
## 2 classes: 'Down', 'Up'
##
##
## Summary of sample sizes: 844, 844, 845, 843, 844, 844, ...
##
##
##
## ROC was used to select the optimal model using the largest value.
https://rpubs.com/njvijay/16444 6/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
#Get the confusion matrix to see accuracy value and other parameter values
confusionMatrix(knnPredict, testing$Direction )
https://rpubs.com/njvijay/16444 7/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
##
## Reference
## Prediction Down Up
## Down 123 9
## Up 27 153
##
## Accuracy : 0.885
##
## Kappa : 0.768
##
## Sensitivity : 0.820
## Specificity : 0.944
## Prevalence : 0.481
##
##
mean(knnPredict == testing$Direction)
## [1] 0.8846
library(pROC)
knnROC
##
## Call:
## Data: knnPredict[, "Down"] in 162 controls (testing$Direction 2) < 162 cases (testing$Direction
2).
https://rpubs.com/njvijay/16444 8/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
##
## Call:
## Data: knnPredict[, "Down"] in 162 controls (testing$Direction 2) < 162 cases (testing$Direction
2).
# Random forrest
rfFit <- train(Direction ~ ., data = training, method = "rf", trControl = ctrl, preProcess = c("ce
nter","scale"), tuneLength = 20)
https://rpubs.com/njvijay/16444 9/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
## randomForest 4.6-7
## note: only 7 unique complexity parameters in default grid. Truncating the grid to 7 .
rfFit
## Random Forest
##
## 938 samples
## 8 predictors
## 2 classes: 'Down', 'Up'
##
##
## Summary of sample sizes: 844, 844, 844, 845, 844, 845, ...
##
##
## 2 1 1 0.004 0.009
## 3 1 1 0.004 0.009
## 4 1 1 0.004 0.009
## 5 1 1 0.004 0.009
## 6 1 1 0.004 0.009
## 7 1 1 0.004 0.009
## 8 1 1 0.004 0.009
##
## Accuracy was used to select the optimal model using the largest value.
plot(rfFit)
https://rpubs.com/njvijay/16444 10/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
confusionMatrix(rfPredict, testing$Direction )
https://rpubs.com/njvijay/16444 11/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
##
## Reference
## Prediction Down Up
## Down 150 0
## Up 0 162
##
## Accuracy : 1
## 95% CI : (0.988, 1)
##
## Kappa : 1
##
## Sensitivity : 1.000
## Specificity : 1.000
## Prevalence : 0.481
##
##
mean(rfPredict == testing$Direction)
## [1] 1
#With twoclasssummary
# Random forrest
rfFit <- train(Direction ~ ., data = training, method = "rf", trControl = ctrl, preProcess = c("ce
nter","scale"), tuneLength = 20)
## note: only 7 unique complexity parameters in default grid. Truncating the grid to 7 .
## Warning: The metric "Accuracy" was not in the result set. ROC will be used
## instead.
rfFit
https://rpubs.com/njvijay/16444 12/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
## Random Forest
##
## 938 samples
## 8 predictors
## 2 classes: 'Down', 'Up'
##
##
## Summary of sample sizes: 844, 844, 845, 845, 843, 845, ...
##
##
## 4 1 1 1 0 0.007 0.005
## 5 1 1 1 0 0.007 0.005
## 7 1 1 1 0 0.007 0.005
## 8 1 1 1 0 0.007 0.005
##
## ROC was used to select the optimal model using the largest value.
plot(rfFit)
https://rpubs.com/njvijay/16444 13/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
https://rpubs.com/njvijay/16444 14/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
confusionMatrix(rfPredict, testing$Direction )
https://rpubs.com/njvijay/16444 15/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
##
## Reference
## Prediction Down Up
## Down 150 0
## Up 0 162
##
## Accuracy : 1
## 95% CI : (0.988, 1)
##
## Kappa : 1
##
## Sensitivity : 1.000
## Specificity : 1.000
## Prevalence : 0.481
##
##
mean(rfPredict == testing$Direction)
## [1] 1
library(pROC)
rfROC
##
## Call:
##
## Data: rfPredict[, "Down"] in 162 controls (testing$Direction 2) < 162 cases (testing$Direction
2).
https://rpubs.com/njvijay/16444 16/17
10/15/21, 1:30 PM RPubs - kNN using R caret package
##
## Call:
##
## Data: rfPredict[, "Down"] in 162 controls (testing$Direction 2) < 162 cases (testing$Direction
2).
https://rpubs.com/njvijay/16444 17/17