Professional Documents
Culture Documents
20bce1205 Lab8
20bce1205 Lab8
Shubham Ojha
20BCE1205
HP
2023-03-08
#Support Vector Machine
dataset =
read.csv("C:/Users/HP/Desktop/padhai/EDA/Lab/Social_Network_Ads.csv")
dataset = dataset[3:5]
summary(dataset)
head(dataset)
library(caTools)
set.seed(123)
split = sample.split(dataset$Purchased, SplitRatio = 0.75)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
training_set[-3] = scale(training_set[-3])
test_set[-3] = scale(test_set[-3])
head(training_set)
library(e1071)
classifierL = svm(formula = Purchased ~ .,
data = training_set,
type = 'C-classification',
kernel = 'linear')
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
1 1
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 0
0 0
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 0 1 0 1 0 1 1 0 1 1 1 0 1 0 1 1 0 0
0 0
## Levels: 0 1
y_predS
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1
1 1
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 1 1 1 0 0 0 1 1 0 0 1 0 1 1 1 1 1 0
1 0
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 1 1 0 1 0 1 1 1 1 0 1 0 1 1 0 0 0 0
0 0
## Levels: 0 1
y_predR
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
0 1
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 1 0 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0
0 1
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 0 1 0 1 0 1 1 0 0 1 1 0 1 0 1 1 1 1
0 1
## Levels: 0 1
y_predP
## 2 4 5 9 12 18 19 20 22 29 32 34 35 38 45 46 48 52
66 69
## 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
0 0
## 74 75 82 84 85 86 87 89 103 104 107 108 109 117 124 126 127 131
134 139
## 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
0 0
## 148 154 156 159 162 163 170 175 176 193 199 200 208 213 224 226 228 229
230 234
## 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0
0 0
## 236 237 239 241 255 264 265 266 273 274 281 286 292 299 302 305 307 310
316 324
## 0 0 0 1 1 0 0 1 1 1 1 0 0 0 0 0 1 0
0 1
## 326 332 339 341 343 347 353 363 364 367 368 369 372 373 380 383 389 392
395 400
## 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0
0 1
## Levels: 0 1
## y_predL
## 0 1
## 0 57 7
## 1 13 23
cmS
## y_predS
## 0 1
## 0 53 11
## 1 14 22
cmR
## y_predR
## 0 1
## 0 58 6
## 1 4 32
cmP
## y_predP
## 0 1
## 0 60 4
## 1 18 18
# Linear
set = training_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifierL, newdata = grid_set)
plot(set[, -3],
main = 'SVM (Training set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3',
'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
#Sigmoid
set = training_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_gridS = predict(classifierS, newdata = grid_set)
plot(set[, -3],
main = 'SVM Sigmoid Kernel (Training set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_gridS), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridS == 1, 'springgreen3',
'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
# Testing
#Linear
set = test_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_grid = predict(classifierL, newdata = grid_set)
plot(set[, -3], main = 'SVM (Test set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3',
'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
#Sigmoid
set = test_set
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
grid_set = expand.grid(X1, X2)
colnames(grid_set) = c('Age', 'EstimatedSalary')
y_gridS = predict(classifierS, newdata = grid_set)
plot(set[, -3], main = 'SVM Sigmoid Kernel (Test set)',
xlab = 'Age', ylab = 'Estimated Salary',
xlim = range(X1), ylim = range(X2))
contour(X1, X2, matrix(as.numeric(y_gridS), length(X1), length(X2)), add =
TRUE)
points(grid_set, pch = '.', col = ifelse(y_gridS == 1, 'springgreen3',
'tomato'))
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
# Random Forest
library(randomForest)
## randomForest 4.7-1.1
library(tree)
library(ggplot2)
##
## Attaching package: 'ggplot2'
library(GGally)
library(dplyr)
##
## Attaching package: 'dplyr'
head(iris)
summary(iris)
summary(decision_tree)
##
## Classification tree:
## tree(formula = Species ~ ., data = iris)
## Variables actually used in tree construction:
## [1] "Petal.Length" "Petal.Width" "Sepal.Length"
## Number of terminal nodes: 6
## Residual mean deviance: 0.1253 = 18.05 / 144
## Misclassification error rate: 0.02667 = 4 / 150
plot(decision_tree)
text(decision_tree)
ggpairs(iris[,1:5])
##
## Call:
## randomForest(formula = Species ~ ., data = train_data, importance = T)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 6.67%
## Confusion matrix:
## setosa versicolor virginica class.error
## setosa 37 0 0 0.00000000
## versicolor 0 32 3 0.08571429
## virginica 0 4 29 0.12121212
plot(iris_classifier)
predicted_table <- predict(iris_classifier, test_data[,-5])
table(observed = test_data[,5], predicted = predicted_table)
## predicted
## observed setosa versicolor virginica
## setosa 13 0 0
## versicolor 0 15 0
## virginica 0 2 15