Grid Search For SVM

10/14/21, 6:07 PM RPubs - Grid Search applied in R
Grid Search applied in R

Grid Search
Basic explanations:
Lecture 286 https://www.udemy.com/machinelearning/learn/lecture/6453728
https://medium.com/datadriveninvestor/an-introduction-to-grid-search-ff57adcc0998
https://towardsdatascience.com/grid-search-for-model-tuning-3319b259367e
Well done description of process with some other data.
https://towardsdatascience.com/grid-search-for-model-tuning-3319b259367e
Pieces of code for above https://gist.github.com/rohanjoseph93
Importing the dataset

dataset = read.csv('Social_Network_Ads.csv')
dataset = dataset[3:5]
Encoding the target feature as factor

dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
Splitting the dataset into the Training set and

Test set
# install.packages('caTools')
library(caTools)
set.seed(123)
split = sample.split(dataset$Purchased, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
Feature Scaling
training_set[-3] = scale(training_set[-3])
test_set[-3] = scale(test_set[-3])
Applying Grid Search to find the best

parameters
Lecture 286 https://www.udemy.com/machinelearning/learn/lecture/6453728
https://rpubs.com/markloessi/506999 1/9
# install.packages('caret')
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
# here is where we setup the grid seach mechanism
classifier = train(form = Purchased ~ ., data = training_set, method = 'svmRadial')
classifier
## Support Vector Machines with Radial Basis Function Kernel
##
## 300 samples
## 2 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 300, 300, 300, 300, 300, 300, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.25 0.9170686 0.8177473
## 0.50 0.9185892 0.8211741
## 1.00 0.9190782 0.8219777
##
## Tuning parameter 'sigma' was held constant at a value of 1.560428
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.560428 and C = 1.
Lets strip that down to only the best values for Sigma and C.
classifier$bestTune
## sigma C
## 3 1.560428 1
Predicting the Test set results

y_pred = predict(classifier, newdata = test_set[-3])
Making the Confusion Matrix
cm = table(test_set[, 3], y_pred)
cm
## y_pred
## 0 1
## 0 58 6
## 1 6 30
Visualising the Training set results

library(ElemStatLearn)
set = training_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifier, newdata = grid_set)
plot(set[, -3],
main = 'Kernel SVM (Training set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
Visualising the Test set results

set = test_set
y_grid = predict(classifier, newdata = grid_set)
plot(set[, -3], main = 'Kernel SVM (Test set)',
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
===============================================
Now, what you can do.

We could take the above and work the sigma and C values into the old way of doing the model build (classifier) or
we could just use the Classifier we made above.
Fitting Kernel SVM to the Training set - the old

way
We’ll use SVM but we could be using any algorithm that would be best for the data, more on that in Grid Search
which is next. Additionally here with algorithm selection it’s important to have some sense of the algorithm and your
data and do they compliment one another properly.
# install.packages('e1071')
library(e1071)
classifierO = svm(formula = Purchased ~ .,
data = training_set,
type = 'C-classification',
kernel = 'radial')
Predicting the Test set results
y_predO = predict(classifierO, newdata = test_set[-3])
Making the Confusion Matrix

cmO = table(test_set[, 3], y_predO)
cmO
## y_predO
## 0 1
## 0 58 6
## 1 4 32
Applying k-Fold Cross Validation

What is K-Fold
The name comes from the idea that we are creating K # of folds; each iteration is called a fold. 10 is the most
common # of folds. Once the process is complete we’ll be able to see elements of the 10 iterations such as Mean
and Standard Deviation.
knitr::include_graphics("k-fold_crossValidation.png")
What are we doing with K-fold?
# install.packages('caret')
library(caret)
folds = createFolds(training_set$Purchased, k = 10)

cv = lapply(folds, function(x) {
training_fold = training_set[-x, ]
test_fold = training_set[x, ]
classifierO = svm(formula = Purchased ~ .,
data = training_fold,
type = 'C-classification',
kernel = 'radial')
y_predO = predict(classifierO, newdata = test_fold[-3])
cmO = table(test_fold[, 3], y_predO)
accuracy = (cmO[1,1] + cmO[2,2]) / (cmO[1,1] + cmO[2,2] + cmO[1,2] + cmO[2,1])
return(accuracy)
})
accuracy = mean(as.numeric(cv))
accuracy
## [1] 0.9132814
Visualising the Training set results

set = training_set
y_gridO = predict(classifier, newdata = grid_set)
plot(set[, -3],
main = 'Kernel SVM old way (Training set)',
contour(X1, X2, matrix(as.numeric(y_gridO), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridO == 1, 'springgreen3', 'tomato'))
Visualising the Test set results

set = test_set
y_gridO = predict(classifier, newdata = grid_set)
plot(set[, -3], main = 'Kernel SVM old way (Test set)',
contour(X1, X2, matrix(as.numeric(y_gridO), length(X1), length(X2)), add = TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridO == 1, 'springgreen3', 'tomato'))

Grid Search For SVM

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Grid Search For SVM

Uploaded by

Copyright:

Available Formats

10/14/21, 6:07 PM RPubs - Grid Search applied in R

Grid Search applied in R

Importing the dataset

Encoding the target feature as factor

Splitting the dataset into the Training set and

split = sample.split(dataset$Purchased, SplitRatio = 0.75)

training_set = subset(dataset, split == TRUE)

test_set = subset(dataset, split == FALSE)

Applying Grid Search to find the best

## Loading required package: lattice

## Loading required package: ggplot2

# here is where we setup the grid seach mechanism

classifier = train(form = Purchased ~ ., data = training_set, method = 'svmRadial')

## Support Vector Machines with Radial Basis Function Kernel

## 2 classes: '0', '1'

## Resampling: Bootstrapped (25 reps)

## Resampling results across tuning parameters:

## 0.25 0.9170686 0.8177473

## 0.50 0.9185892 0.8211741

## 1.00 0.9190782 0.8219777

## Tuning parameter 'sigma' was held constant at a value of 1.560428

Predicting the Test set results

Making the Confusion Matrix

cm = table(test_set[, 3], y_pred)

Visualising the Training set results

X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)

X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)

grid_set = expand.grid(X1, X2)

colnames(grid_set) = c('Age', 'EstimatedSalary')

y_grid = predict(classifier, newdata = grid_set)

main = 'Kernel SVM (Training set)',

xlab = 'Age', ylab = 'Estimated Salary',

xlim = range(X1), ylim = range(X2))

contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)

points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))

points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

Visualising the Test set results

X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)

X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)

grid_set = expand.grid(X1, X2)

colnames(grid_set) = c('Age', 'EstimatedSalary')

y_grid = predict(classifier, newdata = grid_set)

plot(set[, -3], main = 'Kernel SVM (Test set)',

xlab = 'Age', ylab = 'Estimated Salary',

xlim = range(X1), ylim = range(X2))

contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE)

points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))

points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

Now, what you can do.

Fitting Kernel SVM to the Training set - the old

classifierO = svm(formula = Purchased ~ .,

Predicting the Test set results

y_predO = predict(classifierO, newdata = test_set[-3])

Making the Confusion Matrix

Applying k-Fold Cross Validation

What are we doing with K-fold?

folds = createFolds(training_set$Purchased, k = 10)

classifierO = svm(formula = Purchased ~ .,

y_predO = predict(classifierO, newdata = test_fold[-3])

cmO = table(test_fold[, 3], y_predO)

accuracy = (cmO[1,1] + cmO[2,2]) / (cmO[1,1] + cmO[2,2] + cmO[1,2] + cmO[2,1])

Visualising the Training set results

X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)