You are on page 1of 17

Image Classifaction

Dr Manohar Kapse
1 April 2019

# classification of Image into two group


# Library raster, rasterimage

rm(list=ls())

library(imager)

## Loading required package: magrittr

##
## Attaching package: 'imager'

## The following object is masked from 'package:magrittr':


##
## add

## The following objects are masked from 'package:stats':


##
## convolve, spectrum

## The following object is masked from 'package:graphics':


##
## frame

## The following object is masked from 'package:base':


##
## save.image
# import images from Directory or load images of IMCU faculty members
IMCU.Images<- load.dir(path="C:/Users/LENOVO/Desktop/Image classification/institute of Management", pattern=".jpg")
#str(IMCU.Images)
# plot(IMCU.Images[1])

# Use for loop

#--------------------------------------------#
# convert image into gray scale
for (i in 1:length(IMCU.Images))
{
IMCU.Images[[i]]<- grayscale(IMCU.Images[[i]])
}

#str(IMCU.Images)
class(IMCU.Images)

## [1] "imlist" "list"

# plot(IMCU.Images[[1]])

#--------------------------------------------#
# conver image to same size- rescale
for (i in 1:length(IMCU.Images))
{
IMCU.Images[[i]]<-resize(IMCU.Images[[i]], size_x = 100, size_y = 100, size_z = 1, size_c = 1)
}

IMCU.Images

## Image list of size 66

#str(IMCU.Images)
class(IMCU.Images)
## [1] "imlist" "list"

#plot(IMCU.Images[[1]])

#-------------------------------------------------#

# save the each image pixel data as numeric

for (i in 1:length(IMCU.Images))
{
IMCU.Images[[i]]<-as.numeric(IMCU.Images[[i]]) # convert to number
}

#str(IMCU.Images)
class(IMCU.Images)

## [1] "imlist" "list"

#-------------------------------------------------#
# convert data into data frame

IMCU.Images.data<-as.data.frame(IMCU.Images)
str(IMCU.Images.data)

## 'data.frame': 660000 obs. of 2 variables:


## $ im: chr "E1125.jpg" "E1125.jpg" "E1125.jpg" "E1125.jpg" ...
## $ v : num 1 1 1 1 1 1 1 1 1 1 ...

#---------------------------------------------#
# adding unique ids to the data,
# since we know each image has 100*100 points

IMCU.Images.data$id<-seq(1:10000)
str(IMCU.Images.data)
## 'data.frame': 660000 obs. of 3 variables:
## $ im: chr "E1125.jpg" "E1125.jpg" "E1125.jpg" "E1125.jpg" ...
## $ v : num 1 1 1 1 1 1 1 1 1 1 ...
## $ id: int 1 2 3 4 5 6 7 8 9 10 ...

#---------------------------------#
# add grouping variable to the data set
IMCU.Images.data$group1<-"imcu"
str(IMCU.Images.data)

## 'data.frame': 660000 obs. of 4 variables:


## $ im : chr "E1125.jpg" "E1125.jpg" "E1125.jpg" "E1125.jpg" ...
## $ v : num 1 1 1 1 1 1 1 1 1 1 ...
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ group1: chr "imcu" "imcu" "imcu" "imcu" ...

IMCU.Images.data$group1<-as.factor(IMCU.Images.data$group1)

# rearrange the data


IMCU.Images.data<-IMCU.Images.data[,c("id","group1","im","v")]
str(IMCU.Images.data)

## 'data.frame': 660000 obs. of 4 variables:


## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ group1: Factor w/ 1 level "imcu": 1 1 1 1 1 1 1 1 1 1 ...
## $ im : chr "E1125.jpg" "E1125.jpg" "E1125.jpg" "E1125.jpg" ...
## $ v : num 1 1 1 1 1 1 1 1 1 1 ...

#IMCU.Images.data

#----------------------------#
# since the data created is a single col with all pixel positions
# converting 10000 pixels address as variables

library(tidyr)
##
## Attaching package: 'tidyr'

## The following object is masked from 'package:imager':


##
## fill

## The following object is masked from 'package:magrittr':


##
## extract

IMCU.Images.data.1<-spread(IMCU.Images.data, id, v)
#str(IMCU.Images.data.1)

now create a data set for the other department Management Science

# import images from Directory or load images of IMCU faculty members


MS.Images<- load.dir(path="C:/Users/LENOVO/Desktop/Image classification/management studies", pattern=".jpg")
#str(MS.Images)
#plot(MS.Images[1])

# Use for loop

#--------------------------------------------#
# convert image into gray scale
for (i in 1:length(MS.Images))
{
MS.Images[[i]]<- grayscale(MS.Images[[i]])
}

#str(MS.Images)
class(MS.Images)

## [1] "imlist" "list"


#--------------------------------------------#
# conver image to same size- rescale
for (i in 1:length(MS.Images))
{
MS.Images[[i]]<-resize(MS.Images[[i]], size_x = 100, size_y = 100, size_z = 1, size_c = 1)
}

MS.Images

## Image list of size 40

#str(MS.Images)
class(MS.Images)

## [1] "imlist" "list"

# plot(MS.Images[[1]])

#-------------------------------------------------#

# save the each image pixel data as numeric

for (i in 1:length(MS.Images))
{
MS.Images[[i]]<-as.numeric(MS.Images[[i]]) # convert to number
}

#str(MS.Images)
class(MS.Images)

## [1] "imlist" "list"


#-------------------------------------------------#
# convert data into data frame

MS.Images.data<-as.data.frame(MS.Images)

#---------------------------------------------#
# adding unique ids to the data,
# since we know each image has 100*100 points

MS.Images.data$id<-seq(1:10000)
str(MS.Images.data)

## 'data.frame': 400000 obs. of 3 variables:


## $ im: chr "E1247.jpg" "E1247.jpg" "E1247.jpg" "E1247.jpg" ...
## $ v : num 0.922 0.941 0.945 0.937 0.941 ...
## $ id: int 1 2 3 4 5 6 7 8 9 10 ...

#---------------------------------#
# add grouping variable to the data set
MS.Images.data$group1<-"MS"
str(MS.Images.data)

## 'data.frame': 400000 obs. of 4 variables:


## $ im : chr "E1247.jpg" "E1247.jpg" "E1247.jpg" "E1247.jpg" ...
## $ v : num 0.922 0.941 0.945 0.937 0.941 ...
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ group1: chr "MS" "MS" "MS" "MS" ...

MS.Images.data$group1<-as.factor(MS.Images.data$group1)

# rearrange the data


MS.Images.data<-MS.Images.data[,c("id","group1","im","v")]
str(MS.Images.data)
## 'data.frame': 400000 obs. of 4 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ group1: Factor w/ 1 level "MS": 1 1 1 1 1 1 1 1 1 1 ...
## $ im : chr "E1247.jpg" "E1247.jpg" "E1247.jpg" "E1247.jpg" ...
## $ v : num 0.922 0.941 0.945 0.937 0.941 ...

# MS.Images.data

#----------------------------#
# since the data created is a single col with all pixel positions
# converting 10000 pixels address as variables

library(tidyr)
MS.Images.data.1<-spread(MS.Images.data, id, v)
#str(MS.Images.data.1)

combine the two data set created

CU.images.data<-rbind(IMCU.Images.data.1, MS.Images.data.1)
# str(CU.images.data)

apply model for classification


apply the knn on the whole data
# remove the variable im
CU.images.data<-CU.images.data[,-2]
# use caret package for KNN
library(caret)

## Loading required package: lattice

## Loading required package: ggplot2


fitControl = trainControl(method="cv")

knnMod2 = train(group1 ~ ., data=CU.images.data,


method="knn",
trControl=fitControl,
preProcess=c("center","scale"),
tuneLength=5)

summary(knnMod2)

## Length Class Mode


## learn 2 -none- list
## k 1 -none- numeric
## theDots 0 -none- list
## xNames 10000 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list

print(knnMod2)
## k-Nearest Neighbors
##
## 106 samples
## 10000 predictors
## 2 classes: 'imcu', 'MS'
##
## Pre-processing: centered (10000), scaled (10000)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 96, 95, 96, 95, 95, 96, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.6754545 0.3341953
## 7 0.6690909 0.3081981
## 9 0.6300000 0.2157474
## 11 0.6209091 0.1961218
## 13 0.6127273 0.1482331
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 5.

plot(knnMod2)
pred = predict(knnMod2, newdata=CU.images.data)
confusionMatrix(pred, CU.images.data[,1])
## Confusion Matrix and Statistics
##
## Reference
## Prediction imcu MS
## imcu 56 10
## MS 10 30
##
## Accuracy : 0.8113
## 95% CI : (0.7238, 0.8808)
## No Information Rate : 0.6226
## P-Value [Acc > NIR] : 2.122e-05
##
## Kappa : 0.5985
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.8485
## Specificity : 0.7500
## Pos Pred Value : 0.8485
## Neg Pred Value : 0.7500
## Prevalence : 0.6226
## Detection Rate : 0.5283
## Detection Prevalence : 0.6226
## Balanced Accuracy : 0.7992
##
## 'Positive' Class : imcu
##

—————————————————-
SUpport vector Machine
library(e1071)

dtm1<-svm(group1~., data = CU.images.data,kernel = "linear",scale = TRUE)


summary(dtm1)
##
## Call:
## svm(formula = group1 ~ ., data = CU.images.data, kernel = "linear",
## scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
## gamma: 1e-04
##
## Number of Support Vectors: 85
##
## ( 50 35 )
##
##
## Number of Classes: 2
##
## Levels:
## imcu MS

#dtm1$SV
#dtm1$kernel
#dtm1$tot.nSV
#dtm1$decision.values
#dtm1$fitted
#dtm1$coefs

# to check is it a good classifier we will use confusion matrix


predict1<-predict(dtm1,CU.images.data)
confusionMatrix(predict1,CU.images.data$group1)
## Confusion Matrix and Statistics
##
## Reference
## Prediction imcu MS
## imcu 66 0
## MS 0 40
##
## Accuracy : 1
## 95% CI : (0.9658, 1)
## No Information Rate : 0.6226
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.6226
## Detection Rate : 0.6226
## Detection Prevalence : 0.6226
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : imcu
##

————————————————————-
Predicting for image out of data set
test data set
import images from Directory or load images of IMCU
faculty members
test.Images<- load.dir(path="C:/Users/LENOVO/Desktop/Image classification/test", pattern=".jpg")

# convert image into gray scale


for (i in 1:length(test.Images))
{
test.Images[[i]]<- grayscale(test.Images[[i]])
}

# conver image to same size- rescale

for (i in 1:length(test.Images))
{
test.Images[[i]]<-resize(test.Images[[i]], size_x = 100, size_y = 100, size_z = 1, size_c = 1)
}
# save the each image pixel data as numeric

for (i in 1:length(test.Images))
{
test.Images[[i]]<-as.numeric(test.Images[[i]]) # convert to number
}

#-------------------------------------------------#
# convert data into data frame

test.Images.data<-as.data.frame(test.Images)

#---------------------------------------------#
# adding unique ids to the data,
# since we know each image has 100*100 points

test.Images.data$id<-seq(1:10000)

#---------------------------------#
# add grouping variable to the data set
test.Images.data$group1<-"test"
test.Images.data$group1<-as.factor(test.Images.data$group1)

# rearrange the data


test.Images.data<-test.Images.data[,c("id","group1","im","v")]
#----------------------------#
# converting 10000 pixels address as variables

library(tidyr)

test.Images.data.1<-spread(test.Images.data, id, v)
#str(test.Images.data.1)
test.Images.data.1<-test.Images.data.1[,-2]

# to check is it a good classifier we will use confusion matrix


predict1<-predict(dtm1,test.Images.data.1)
predict1

## 1 2 3 4
## imcu imcu imcu imcu
## Levels: imcu MS

———————————————-
Random Forest tree
takes lot of time - Not useful

You might also like