Analítica Cross Selling: Prof José Antonio Taquía Gutiérrez

Analítica cross selling
Prof José Antonio Taquía Gutiérrez

Procedimiento
Cargamos el dataset de productos y describimos su contenido:
Los datos constan de tres columnas:
Member_number: una identificación que puede ayudar a
distinguir diferentes compras por diferentes clientes.
Fecha: la fecha de la transacción
ItemDescription: la descripción del artículo real que se compró.
>dat <- read.csv(file="dat_groceries.csv", header=TRUE, sep=",")

>dat.data<-data.frame(dat)
>dim(dat.data)
>head(dat.data)
Procesamiento de los datos
 print(typeof(dat))
 df_sorted <- dat[order(dat$Member_number),]
 #convert member number to numeric
 df_sorted$Member_number <- as.numeric(df_sorted$Member_number)
 #convert item description to categorical format
 df_sorted$itemDescription <- as.factor(df_sorted$itemDescription)
 library(plyr)
 library(dplyr)
 if(sessionInfo()['basePkgs']=="dplyr" | sessionInfo()
['otherPkgs']=="dplyr"){ detach(package:dplyr, unload=TRUE)}
 #group all the items that were bought together; by the same
customer on the same date
 library(plyr)
 df_itemList <- ddply(dat, c("Member_number","Date"),
function(df1)paste(df1$itemDescription,collapse = ","))
 #remove member number and date

 df_itemList$Member_number <- NULL
 df_itemList$Date <- NULL
Ddply
Esta función divide el data frame con variables.
 # Summarize a dataset by two variables
 dfx <- data.frame(
group = c(rep('A', 8), rep('B', 15), rep('C', 6)),
sex = sample(c("M", "F"), size = 29, replace = TRUE),
age = runif(n = 29, min = 18, max = 54))
 # Note the use of the '.' function to allow>
 # group and sex to be used without quoting
 > ddply(dfx, .(group, sex), summarize, mean = round(mean(age), 2),
sd = round(sd(age), 2))
group sex mean sd1
A F 31.59 8.982
A M 34.27 7.933
B F 46.21 9.014
B M 42.74 9.255
C F 41.59 1.546
C M 31.59 15.01
 colnames(df_itemList) <- c("itemList")
 #write to csv format
 write.csv(df_itemList,"ItemList100.csv",quote = FALSE, row.names = TRUE)
 #load package required
 library(arules)
 #convert csv file to basket format

 txn = read.transactions(file="ItemList100.csv", rm.duplicates= FALSE, format="b
 print(typeof(txn)) #S4
 #remove quotes from transactions
 txn@itemInfo$labels <- gsub("\"","",txn@itemInfo$labels)
 #run apriori algorithm
 basket_rules <- apriori(txn,parameter = list(minlen=2,sup = 0.001, conf
= 0.01, target="rules"))
 #basket_rules <- apriori(txn,parameter = list(minlen=2,sup = 0.00001,
conf = 0.01, target="rules"),appearance = list(lhs = "CLEMENTINES")))
 #check if tm is attched; if yes then detach

 if(sessionInfo()['basePkgs']=="tm" | sessionInfo()['otherPkgs']=="tm"){
detach(package:sentiment, unload=TRUE)
detach(package:tm, unload=TRUE)
}
 #view rules
 inspect(basket_rules)
 #convert to datframe and view; optional
 df_basket <- as(basket_rules,"data.frame")
 df_basket$confidence <- df_basket$confidence * 100
 df_basket$support <- df_basket$support * nrow(df)
# Rules for recommendations:
# split lhs and rhs into two columns
library(reshape2)
#Reshape2 is an R package written by Hadley Wickham that makes it easy to

transform #data between wide and long formats.
df_basket <- transform(df_basket, rules = colsplit(rules, pattern = "=>", names =

c("lhs","rhs")))
# Remove curly brackets around rules

df_basket$rules$lhs <- gsub("[[:punct:]]", "", df_basket$rules$lhs)
df_basket$rules$rhs <- gsub("[[:punct:]]", "", df_basket$rules$rhs)
 # convert to chracter
 df_basket$rules$lhs <- as.character(df_basket$rules$lhs)
 df_basket$rules$rhs <- as.character(df_basket$rules$rhs)
 library(stringi)
 library(dplyr)
 df_basket$rules %>%
 filter(stri_detect_fixed(lhs, "yogurt")) %>%
 select(rhs)
Visualización de resultados
 #plot the rules
 library(arulesViz)
 plot(basket_rules)
 set.seed(8000)
 plot(basket_rules, method = "grouped", control = list(k = 5))
 plot(basket_rules[1:10,], method="graph",
control=list(type="items"))
Visualización de resultados
 plot(basket_rules[1:10,], method="paracoord",
control=list(alpha=.5, reorder=TRUE))
 itemFrequencyPlot(txn, topN = 5)
 plot(basket_rules[1:10,],measure=c("support","lift"),shading=
"confidence",interactive=T)

Analítica Cross Selling: Prof José Antonio Taquía Gutiérrez

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Analítica Cross Selling: Prof José Antonio Taquía Gutiérrez

Uploaded by

Copyright:

Available Formats

Analítica cross selling

Prof José Antonio Taquía Gutiérrez

>dat <- read.csv(file="dat_groceries.csv", header=TRUE, sep=",")

 #remove member number and date

 #convert csv file to basket format

 #check if tm is attched; if yes then detach

# split lhs and rhs into two columns

#Reshape2 is an R package written by Hadley Wickham that makes it easy to

df_basket <- transform(df_basket, rules = colsplit(rules, pattern = "=>", names =

# Remove curly brackets around rules

You might also like