20bce1205 Lab8

20BCE1205-Lab8.
Shubham Ojha
20BCE1205
EDA LAB (L1+L2)
SVM and Random Forest
HP
2023-03-08
#Support Vector Machine
dataset =
read.csv("C:/Users/HP/Desktop/padhai/EDA/Lab/Social_Network_Ads.csv")
dataset = dataset[3:5]
summary(dataset)
## Age EstimatedSalary Purchased

## Min. :18.00 Min. : 15000 Min. :0.0000
## 1st Qu.:29.75 1st Qu.: 43000 1st Qu.:0.0000
## Median :37.00 Median : 70000 Median :0.0000
## Mean :37.66 Mean : 69743 Mean :0.3575
## 3rd Qu.:46.00 3rd Qu.: 88000 3rd Qu.:1.0000
## Max. :60.00 Max. :150000 Max. :1.0000
head(dataset)

## 1 19 19000 0
## 2 35 20000 0
## 3 26 43000 0
## 4 27 57000 0
## 5 19 76000 0
## 6 27 58000 0
dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
summary(dataset)

## Min. :18.00 Min. : 15000 0:257
## 1st Qu.:29.75 1st Qu.: 43000 1:143
## Median :37.00 Median : 70000
## Mean :37.66 Mean : 69743
## 3rd Qu.:46.00 3rd Qu.: 88000
## Max. :60.00 Max. :150000
library(caTools)
set.seed(123)
split = sample.split(dataset$Purchased, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
training_set[-3] = scale(training_set[-3])
test_set[-3] = scale(test_set[-3])
head(training_set)

## 1 -1.7655475 -1.4733414 0
## 3 -1.0962966 -0.7883761 0
## 6 -1.0006894 -0.3602727 0
## 7 -1.0006894 0.3817730 0
## 8 -0.5226531 2.2654277 1
## 10 -0.2358313 -0.1604912 0
library(e1071)
classifierL = svm(formula = Purchased ~ .,
data = training_set,
type = 'C-classification',
kernel = 'linear')
classifierS = svm(formula = Purchased ~ .,

type = 'C-classification', # this is because we want to
make a regression classification
kernel = 'sigmoid')
classifierR = svm(formula = Purchased ~ .,

kernel = 'radial')
classifierP = svm(formula = Purchased ~ .,

kernel = 'polynomial')
y_predL = predict(classifierL, newdata = test_set[-3])
y_predS = predict(classifierS, newdata = test_set[-3])
y_predR = predict(classifierR, newdata = test_set[-3])
y_predP = predict(classifierP, newdata = test_set[-3])
y_predL
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
1 1
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 0
0 0
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 0 1 0 1 0 1 1 0 1 1 1 0 1 0 1 1 0 0
0 0
## Levels: 0 1
y_predS
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1
1 1
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 1 1 1 0 0 0 1 1 0 0 1 0 1 1 1 1 1 0
1 0
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 1 1 0 1 0 1 1 1 1 0 1 0 1 1 0 0 0 0
0 0
## Levels: 0 1
y_predR
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
0 1
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 1 0 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0
0 1
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 0 1 0 1 0 1 1 0 0 1 1 0 1 0 1 1 1 1
0 1
## Levels: 0 1
y_predP
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
0 0
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 0 0 0 1 1 0 0 1 1 1 1 0 0 0 0 0 1 0
0 1
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0
0 1
## Levels: 0 1
cmL = table(test_set[, 3], y_predL)

cmS = table(test_set[, 3], y_predS)
cmR = table(test_set[, 3], y_predR)
cmP = table(test_set[, 3], y_predP)
cmL
## y_predL
## 0 1
## 0 57 7
## 1 13 23
cmS
## y_predS
## 0 1
## 0 53 11
## 1 14 22
cmR
## y_predR
## 0 1
## 0 58 6
## 1 4 32
cmP
## y_predP
## 0 1
## 0 60 4
## 1 18 18
# Linear
set = training_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifierL, newdata = grid_set)
plot(set[, -3],
main = 'SVM (Training set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3',
'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
#Sigmoid
set = training_set
y_gridS = predict(classifierS, newdata = grid_set)
plot(set[, -3],
main = 'SVM Sigmoid Kernel (Training set)',
contour(X1, X2, matrix(as.numeric(y_gridS), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridS == 1, 'springgreen3',
'tomato'))
# Testing
#Linear
set = test_set
y_grid = predict(classifierL, newdata = grid_set)
plot(set[, -3], main = 'SVM (Test set)',
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3',
'tomato'))
#Sigmoid
set = test_set
y_gridS = predict(classifierS, newdata = grid_set)
plot(set[, -3], main = 'SVM Sigmoid Kernel (Test set)',
contour(X1, X2, matrix(as.numeric(y_gridS), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridS == 1, 'springgreen3',
'tomato'))
# Random Forest
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(tree)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':

##
## margin
library(GGally)
## Registered S3 method overwritten by 'GGally':

## method from
## +.gg ggplot2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':

##
## combine
## The following objects are masked from 'package:stats':

##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species

## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width

## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
decision_tree <- tree(Species ~ ., data = iris)

decision_tree
## node), split, n, deviance, yval, (yprob)

## * denotes terminal node
##
## 1) root 150 329.600 setosa ( 0.33333 0.33333 0.33333 )
## 2) Petal.Length < 2.45 50 0.000 setosa ( 1.00000 0.00000 0.00000 ) *
## 3) Petal.Length > 2.45 100 138.600 versicolor ( 0.00000 0.50000 0.50000
)
## 6) Petal.Width < 1.75 54 33.320 versicolor ( 0.00000 0.90741 0.09259
)
## 12) Petal.Length < 4.95 48 9.721 versicolor ( 0.00000 0.97917
0.02083 )
## 24) Sepal.Length < 5.15 5 5.004 versicolor ( 0.00000 0.80000
0.20000 ) *
## 25) Sepal.Length > 5.15 43 0.000 versicolor ( 0.00000 1.00000
0.00000 ) *
## 13) Petal.Length > 4.95 6 7.638 virginica ( 0.00000 0.33333
0.66667 ) *
## 7) Petal.Width > 1.75 46 9.635 virginica ( 0.00000 0.02174
0.97826 )
## 14) Petal.Length < 4.95 6 5.407 virginica ( 0.00000 0.16667
0.83333 ) *
## 15) Petal.Length > 4.95 40 0.000 virginica ( 0.00000 0.00000
1.00000 ) *
summary(decision_tree)
##
## Classification tree:
## tree(formula = Species ~ ., data = iris)
## Variables actually used in tree construction:
## [1] "Petal.Length" "Petal.Width" "Sepal.Length"
## Number of terminal nodes: 6
## Residual mean deviance: 0.1253 = 18.05 / 144
## Misclassification error rate: 0.02667 = 4 / 150
plot(decision_tree)
text(decision_tree)
ggpairs(iris[,1:5])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

index_row <- sample(2,
nrow(iris),
replace = T,
prob = c(0.7, 0.3)
)
train_data <- iris[index_row == 1,]
test_data <- iris[index_row == 2,]
iris_classifier <- randomForest(Species ~.,
data = train_data,
importance = T)
iris_classifier
##
## Call:
## randomForest(formula = Species ~ ., data = train_data, importance = T)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 6.67%
## Confusion matrix:
## setosa versicolor virginica class.error
## setosa 37 0 0 0.00000000
## versicolor 0 32 3 0.08571429
## virginica 0 4 29 0.12121212
plot(iris_classifier)
predicted_table <- predict(iris_classifier, test_data[,-5])
table(observed = test_data[,5], predicted = predicted_table)
## predicted
## observed setosa versicolor virginica
## setosa 13 0 0
## versicolor 0 15 0
## virginica 0 2 15

20bce1205 Lab8

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

20bce1205 Lab8

Uploaded by

Copyright:

Available Formats

20BCE1205-Lab8.

EDA LAB (L1+L2)

SVM and Random Forest

## Age EstimatedSalary Purchased

## Age EstimatedSalary Purchased

## Age EstimatedSalary Purchased

## Age EstimatedSalary Purchased

classifierS = svm(formula = Purchased ~ .,

classifierR = svm(formula = Purchased ~ .,

classifierP = svm(formula = Purchased ~ .,

cmL = table(test_set[, 3], y_predL)

## Type rfNews() to see new features/changes/bug fixes.

## The following object is masked from 'package:randomForest':

## Registered S3 method overwritten by 'GGally':

## The following object is masked from 'package:randomForest':

## The following objects are masked from 'package:stats':

## Sepal.Length Sepal.Width Petal.Length Petal.Width Species

## Sepal.Length Sepal.Width Petal.Length Petal.Width

decision_tree <- tree(Species ~ ., data = iris)

## node), split, n, deviance, yval, (yprob)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

You might also like