You are on page 1of 23

Association Rules : Rcode

Books data set :

=> Installing packages “arules”


install.packages("arules")
=> Invoking package
library("arules")
=> loading the data set
books<-read.csv(file.choose())
Inspecting the data set
inspect(books[1:5])
class(books)

summary(books)
ChildBks YouthBks CookBks DoItYBks
Min. :0.000 Min. :0.0000 Min. :0.000 Min. :0.000
1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000
Median :0.000 Median :0.0000 Median :0.000 Median :0.000
Mean :0.423 Mean :0.2475 Mean :0.431 Mean :0.282
3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.000
Max. :1.000 Max. :1.0000 Max. :1.000 Max. :1.000
RefBks ArtBks GeogBks ItalCook
Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
Median :0.0000 Median :0.000 Median :0.000 Median :0.0000
Mean :0.2145 Mean :0.241 Mean :0.276 Mean :0.1135
3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.000 Max. :1.000 Max. :1.0000
ItalAtlas ItalArt Florence
Min. :0.000 Min. :0.0000 Min. :0.0000
1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.000 Median :0.0000 Median :0.0000
Mean :0.037 Mean :0.0485 Mean :0.1085
3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000

# making rules using apriori algorithm


Using algorithm Apriori to under stand the data set
Parameter specification:
Confidence minval smax arem aval originalSupport maxtime support
0.7 0.1 1 none FALSE TRUE 5 0.004
Minlen maxlen target ext
2 10 rules TRUE

# Building rules using apriori algorithm


Algorithmic control:
Filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

Absolute minimum support count: 8

set item appearances ...[0 item(s)] done [0.00s].


set transactions ...[11 item(s), 2000 transaction(s)] done [0.00s].
sorting and recoding items ... [11 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 6 7 8 9 10 done [0.00s].
writing ... [11242 rule(s)] done [0.02s].
creating S4 object ... done [0.01s].

=>arules<-apriori(books, parameter = list(support = 0.004, confidence = 0.70, minlen = 2))


arules
=>set of 11242 rules

# Viewing rules based on lift value


inspect(head(sort(arules, by = "lift"))) # to view we use inspect
lhs rhs support confidence
[1] {ChildBks=[0,1]} => {YouthBks=[0,1]} 1 1
[2] {YouthBks=[0,1]} => {ChildBks=[0,1]} 1 1
[3] {ChildBks=[0,1]} => {CookBks=[0,1]} 1 1
[4] {CookBks=[0,1]} => {ChildBks=[0,1]} 1 1
[5] {ChildBks=[0,1]} => {DoItYBks=[0,1]} 1 1
[6] {DoItYBks=[0,1]} => {ChildBks=[0,1]} 1 1
coverage lift count
[1] 1 1 2000
[2] 1 1 2000
[3] 1 1 2000
[4] 1 1 2000
[5] 1 1 2000
[6] 1 1 2000

# Overall quality
head(quality(arules)

# install.packages("arueslViz")
Installing the packages for better visualization
library("arulesViz") # for visualizing rules
Invoking the library

# Different Ways of Visualizing Rules


plot(arules)
windows()
plot(arules, method = "grouped")

=> plot(arules[1:10], method = "graph") # for good visualization try plotting only few rules
=> Saving the data
write(arules, file = "a_rules.csv", sep = ",")
getwd()

Phone data set :

install.packages("arules")
=>Installing the packages
library("arules")
=>Used for building association rules ( apriori algorithm)
=>Loading the data set
phone<-read.csv(file.choose())

inspect(phone[1:5])
Summarizing the data for check min max values
class(phone)
summary(phone)
V1 V2 V3
Length:11 Length:11 Length:11
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
red white green
Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :1.0000 Median :1.0000 Median :0.0000
Mean :0.5455 Mean :0.6364 Mean :0.1818
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.0000 Max. :1.0000

yellow orange blue


Min. :0.00000 Min. :0.0000 Min. :0.0000
1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.00000 Median :0.0000 Median :1.0000
Mean :0.09091 Mean :0.1818 Mean :0.5455
3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000
Max. :1.00000 Max. :1.0000 Max. :1.0000

# making rules using apriori algorithm

# Building rules using apriori algorithm


Parameter specification:
confidence minval smax arem aval originalSupport maxtime support
0.7 0.1 1 none FALSE TRUE 5 0.004
Minlen maxlen target ext
1 10 rules TRUE

arules<-apriori(phone,parameter = list(support = 0.004, confidence = 0.70, minlen = 1))


arules
=>set of 10224 rules

=>inspect(head(sort(arules, by = "lift"))) # to view the inspect


lhs rhs support confidence
[1] {V1=yellow} => {V2=} 0.09090909 1
[2] {V1=green} => {V2=} 0.09090909 1
[3] {V1=yellow,white=[0,0.333)} => {V2=} 0.09090909 1
[4] {V1=yellow,V3=} => {V2=} 0.09090909 1
[5] {V1=yellow,blue=[0,1]} => {V2=} 0.09090909 1
[6] {V1=yellow,orange=[0,1]} => {V2=} 0.09090909 1
coverage lift count
[1] 0.09090909 5.5 1
[2] 0.09090909 5.5 1
[3] 0.09090909 5.5 1
[4] 0.09090909 5.5 1
[5] 0.09090909 5.5 1
[6] 0.09090909 5.5 1

head(quality(arules))
support confidence coverage lift count
1 1.00000000 1 1.00000000 1.0 11
2 1.00000000 1 1.00000000 1.0 11
3 1.00000000 1 1.00000000 1.0 11
4 1.00000000 1 1.00000000 1.0 11
5 1.00000000 1 1.00000000 1.0 11
6 0.09090909 1 0.09090909 5.5 1

library("arulesViz") # for visualizing rules


=> Invoking the library to know the data set visuals
plot(arules)

windows()
plot(arules, method = "grouped")
plot(arules[1:5], method = "graph") # for good visualization try plotting only few rules
=>Saving the data
write(arules, file = "a_rules.csv", sep = ",")

getwd()
Movie data set :

install.packages("arules")

library("arules") # Used for building association rules i.e. apriori algorithm


movie<-read.csv(file.choose())

inspect(movie[1:10])
class(movie)
Creating data frame

summary(movie)
V1 V2 V3
Length:10 Length:10 Length:10
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
V4 V5 Sixth.Sense
Length:10 Length:10 Min. :0.0
Class :character Class :character 1st Qu.:0.0
Mode :character Mode :character Median :1.0
Mean :0.6
3rd Qu.:1.0
Max. :1.0
Gladiator LOTR1 Harry.Potter1 Patriot
Min. :0.00 Min. :0.0 Min. :0.0 Min. :0.0
1st Qu.:0.25 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0
Median :1.00 Median :0.0 Median :0.0 Median :1.0
Mean :0.70 Mean :0.2 Mean :0.2 Mean :0.6
3rd Qu.:1.00 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:1.0
Max. :1.00 Max. :1.0 Max. :1.0 Max. :1.0
LOTR2 Harry.Potter2 LOTR Braveheart
Min. :0.0 Min. :0.0 Min. :0.0 Min. :0.0
1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0
Median :0.0 Median :0.0 Median :0.0 Median :0.0
Mean :0.2 Mean :0.1 Mean :0.1 Mean :0.1
3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0
Max. :1.0 Max. :1.0 Max. :1.0 Max. :1.0
Green.Mile
Min. :0.0
1st Qu.:0.0
Median :0.0
Mean :0.2
3rd Qu.:0.0
Max. :1.0

# making rules using apriori algorithm

# Building rules using apriori algorithm


arules<-apriori(movie,parameter = list(support = 0.004, confidence = 0.70, minlen = 1))
Apriori

Parameter specification:
confidence minval smax arem aval originalSupport maxtime support
0.7 0.1 1 none FALSE TRUE 5 0.004
Minlen maxlen target ext
1 10 rules TRUE

Absolute minimum support count: 0

set item appearances ...[0 item(s)] done [0.00s].


set transactions ...[28 item(s), 10 transaction(s)] done [0.00s].
sorting and recoding items ... [28 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 6 7 8 9 10 done [0.22s].
writing ... [1090108 rule(s)] done [1.61s].
creating S4 object ... done [1.02s].
=>arules
set of 1090108 rules

inspect(head(sort(arules, by = "lift"))) # to view the inspect


lhs rhs support confidence
[1] {V3=Gladiator} => {V2=LOTR} 0.1 1
[2] {V2=LOTR} => {V3=Gladiator} 0.1 1
[3] {V2=LOTR1} => {V3=Harry Potter1} 0.1 1
[4] {V3=Harry Potter1} => {V2=LOTR1} 0.1 1
[5] {V2=LOTR1} => {V5=LOTR2} 0.1 1
[6] {V5=LOTR2} => {V2=LOTR1} 0.1 1
coverage lift count
[1] 0.1 10 1
[2] 0.1 10 1
[3] 0.1 10 1
[4] 0.1 10 1
[5] 0.1 10 1
[6] 0.1 10 1

head(quality(arules))
support confidence coverage lift count
1 0.8 0.8 1 1 8
2 0.9 0.9 1 1 9
3 1.0 1.0 1 1 10
4 1.0 1.0 1 1 10
5 1.0 1.0 1 1 10
6 1.0 1.0 1 1 10

library("arulesViz") # for visualizing rules


plot(arules)
windows()
plot(arules, method = "grouped")

plot(arules[1:10], method = "graph") # for good visualization try plotting only few rules
=>Saving the data
write(arules, file = "a_rules.csv", sep = ",")

getwd()

Transaction data set :

install.packages("arules")
=>Loading the data set

library("arules") # Used for building association rules i.e. apriori algorithm


tr<-read.csv(file.choose())

inspect(tr[1:5])
class(tr)
summary(tr)
X.HANGING. X.HEART. X.HOLDER.
Length:557040 Length:557040 Length:557040
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
X.T.LIGHT. X.WHITE. NA.
Length:557040 Length:557040 Length:557040
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
# making rules using apriori algorithm
Apriori

Parameter specification:
confidence minval smax arem aval originalSupport maxtime support
0.7 0.1 1 none FALSE TRUE 5 0.004
Minlen maxlen target ext
1 10 rules TRUE
Absolute minimum support count: 2228

set item appearances ...[0 item(s)] done [0.00s].


set transactions ...[4754 item(s), 557040 transaction(s)] done [2.24s].
sorting and recoding items ... [211 item(s)] done [0.08s].
creating transaction tree ... done [1.20s].
checking subsets of size 1 2 3 4 5 done [0.08s].
writing ... [124 rule(s)] done [0.00s].
creating S4 object ... done [0.27s]
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

# Building rules using apriori algorithm

arules<-apriori(tr,parameter = list(support = 0.004, confidence = 0.70, minlen = 1))


arules
set of 124 rules

inspect(head(sort(arules, by = "lift"))) # to view the inspect


lhs rhs support confidence coverage lift count
[1] {X.HANGING.='3',
X.T.LIGHT.='TIER'} => {X.HEART.='CAKESTAND'} 0.004680095 1.0000000 0.004680095
213.6709 2607
[2] {X.HEART.='CAKESTAND'} => {X.T.LIGHT.='TIER'} 0.004680095 1.0000000 0.004680095
205.5498 2607
[3] {X.HANGING.='3',
X.HEART.='CAKESTAND'} => {X.T.LIGHT.='TIER'} 0.004680095 1.0000000 0.004680095
205.5498 2607
[4] {X.T.LIGHT.='TIER'} => {X.HEART.='CAKESTAND'} 0.004680095 0.9619926 0.004865001
205.5498 2607
[5] {X.HOLDER.='LONDON',
X.T.LIGHT.='LOVE'} => {X.HEART.='I'} 0.004261812 0.8864824 0.004807554 183.9814
2374
[6] {X.WHITE.='TEACUP'} => {X.T.LIGHT.='SAUCER'} 0.005478960 1.0000000 0.005478960
181.3281 3052
head(quality(arules))
support confidence coverage lift count
1 0.004680095 1.0000000 0.004680095 205.54982 2607
2 0.004680095 0.9619926 0.004865001 205.54982 2607
3 0.004680095 1.0000000 0.004680095 29.79780 2607
4 0.004680095 0.9619926 0.004865001 28.66526 2607
5 0.004234884 0.7816435 0.005417923 127.08893 2359
6 0.004328235 0.8453717 0.005119920 43.21823 2411

library("arulesViz") # for visualizing rules


plot(arules)

windows()
plot(arules, method = "grouped")
plot(arules[1:5], method = "graph") # for good visualization try plotting only few rules
=>Saving the data
write(arules, file = "a_rules.csv", sep = ",")

getwd()

You might also like