You are on page 1of 9

10/14/21, 6:07 PM RPubs - Grid Search applied in R

Grid Search applied in R


Grid Search
Basic explanations:
Lecture 286 https://www.udemy.com/machinelearning/learn/lecture/6453728
https://medium.com/datadriveninvestor/an-introduction-to-grid-search-ff57adcc0998
https://towardsdatascience.com/grid-search-for-model-tuning-3319b259367e
Well done description of process with some other data.
https://towardsdatascience.com/grid-search-for-model-tuning-3319b259367e
Pieces of code for above https://gist.github.com/rohanjoseph93

Importing the dataset


dataset = read.csv('Social_Network_Ads.csv')

dataset = dataset[3:5]

Encoding the target feature as factor


dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))

Splitting the dataset into the Training set and


Test set
# install.packages('caTools')

library(caTools)

set.seed(123)

split = sample.split(dataset$Purchased, SplitRatio = 0.75)

training_set = subset(dataset, split == TRUE)

test_set = subset(dataset, split == FALSE)

Feature Scaling
training_set[-3] = scale(training_set[-3])

test_set[-3] = scale(test_set[-3])

Applying Grid Search to find the best


parameters
Lecture 286 https://www.udemy.com/machinelearning/learn/lecture/6453728

https://rpubs.com/markloessi/506999 1/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

# install.packages('caret')

library(caret)

## Loading required package: lattice

## Loading required package: ggplot2

# here is where we setup the grid seach mechanism

classifier = train(form = Purchased ~ ., data = training_set, method = 'svmRadial')

classifier

## Support Vector Machines with Radial Basis Function Kernel

##

## 300 samples

## 2 predictor

## 2 classes: '0', '1'

##

## No pre-processing

## Resampling: Bootstrapped (25 reps)

## Summary of sample sizes: 300, 300, 300, 300, 300, 300, ...

## Resampling results across tuning parameters:

##

## C Accuracy Kappa

## 0.25 0.9170686 0.8177473

## 0.50 0.9185892 0.8211741

## 1.00 0.9190782 0.8219777

##

## Tuning parameter 'sigma' was held constant at a value of 1.560428

## Accuracy was used to select the optimal model using the largest value.

## The final values used for the model were sigma = 1.560428 and C = 1.

Lets strip that down to only the best values for Sigma and C.

classifier$bestTune

## sigma C

## 3 1.560428 1

Predicting the Test set results


y_pred = predict(classifier, newdata = test_set[-3])

Making the Confusion Matrix

https://rpubs.com/markloessi/506999 2/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

cm = table(test_set[, 3], y_pred)

cm

## y_pred

## 0 1

## 0 58 6

## 1 6 30

Visualising the Training set results


library(ElemStatLearn)

set = training_set

X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)

X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)

grid_set = expand.grid(X1, X2)

colnames(grid_set) = c('Age', 'EstimatedSalary')

y_grid = predict(classifier, newdata = grid_set)

plot(set[, -3],

main = 'Kernel SVM (Training set)',

xlab = 'Age', ylab = 'Estimated Salary',

xlim = range(X1), ylim = range(X2))

contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)

points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))

points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

https://rpubs.com/markloessi/506999 3/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

Visualising the Test set results


library(ElemStatLearn)

set = test_set

X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)

X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)

grid_set = expand.grid(X1, X2)

colnames(grid_set) = c('Age', 'EstimatedSalary')

y_grid = predict(classifier, newdata = grid_set)

plot(set[, -3], main = 'Kernel SVM (Test set)',

xlab = 'Age', ylab = 'Estimated Salary',

xlim = range(X1), ylim = range(X2))

contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)

points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))

points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

https://rpubs.com/markloessi/506999 4/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

===============================================

Now, what you can do.


We could take the above and work the sigma and C values into the old way of doing the model build (classifier) or
we could just use the Classifier we made above.

Fitting Kernel SVM to the Training set - the old


way
We’ll use SVM but we could be using any algorithm that would be best for the data, more on that in Grid Search
which is next. Additionally here with algorithm selection it’s important to have some sense of the algorithm and your
data and do they compliment one another properly.

# install.packages('e1071')

library(e1071)

classifierO = svm(formula = Purchased ~ .,

data = training_set,

type = 'C-classification',

kernel = 'radial')

Predicting the Test set results

https://rpubs.com/markloessi/506999 5/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

y_predO = predict(classifierO, newdata = test_set[-3])

Making the Confusion Matrix


cmO = table(test_set[, 3], y_predO)

cmO

## y_predO

## 0 1

## 0 58 6

## 1 4 32

Applying k-Fold Cross Validation


What is K-Fold
The name comes from the idea that we are creating K # of folds; each iteration is called a fold. 10 is the most
common # of folds. Once the process is complete we’ll be able to see elements of the 10 iterations such as Mean
and Standard Deviation.

knitr::include_graphics("k-fold_crossValidation.png")

What are we doing with K-fold?

https://rpubs.com/markloessi/506999 6/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

# install.packages('caret')

library(caret)

folds = createFolds(training_set$Purchased, k = 10)


cv = lapply(folds, function(x) {

training_fold = training_set[-x, ]

test_fold = training_set[x, ]

classifierO = svm(formula = Purchased ~ .,

data = training_fold,

type = 'C-classification',

kernel = 'radial')

y_predO = predict(classifierO, newdata = test_fold[-3])

cmO = table(test_fold[, 3], y_predO)

accuracy = (cmO[1,1] + cmO[2,2]) / (cmO[1,1] + cmO[2,2] + cmO[1,2] + cmO[2,1])

return(accuracy)

})

accuracy = mean(as.numeric(cv))

accuracy

## [1] 0.9132814

Visualising the Training set results


library(ElemStatLearn)

set = training_set

X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)

X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)

grid_set = expand.grid(X1, X2)

colnames(grid_set) = c('Age', 'EstimatedSalary')

y_gridO = predict(classifier, newdata = grid_set)

plot(set[, -3],

main = 'Kernel SVM old way (Training set)',

xlab = 'Age', ylab = 'Estimated Salary',

xlim = range(X1), ylim = range(X2))

contour(X1, X2, matrix(as.numeric(y_gridO), length(X1), length(X2)), add = TRUE)

points(grid_set, pch = '.', col = ifelse(y_gridO == 1, 'springgreen3', 'tomato'))

points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

https://rpubs.com/markloessi/506999 7/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

Visualising the Test set results


library(ElemStatLearn)

set = test_set

X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)

X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)

grid_set = expand.grid(X1, X2)

colnames(grid_set) = c('Age', 'EstimatedSalary')

y_gridO = predict(classifier, newdata = grid_set)

plot(set[, -3], main = 'Kernel SVM old way (Test set)',

xlab = 'Age', ylab = 'Estimated Salary',

xlim = range(X1), ylim = range(X2))

contour(X1, X2, matrix(as.numeric(y_gridO), length(X1), length(X2)), add = TRUE)

points(grid_set, pch = '.', col = ifelse(y_gridO == 1, 'springgreen3', 'tomato'))

points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

https://rpubs.com/markloessi/506999 8/9
10/14/21, 6:07 PM RPubs - Grid Search applied in R

https://rpubs.com/markloessi/506999 9/9

You might also like