You are on page 1of 6

Retail Relay Case

Source Code:

library(aod)

library(ggplot2)

library(tidyverse) # Easy data manipulation and visualization

library(caret) #For machine learning workflow

library(modelr) # provides easy pipeline modeling functions

library(broom) # helps to tidy up model outputs

library(Rcpp)

dataset1 <- readxl::read_excel("train.xlsX",sheet = "train_data")

head(dataset1)

str(dataset1)

dataset2 <- na.omit(dataset1)

library(fastDummies)

dataset2 <- dummy_cols(dataset2,select_columns = "favday")

dataset2 <- dummy_cols(dataset2,select_columns = "city")

dataset2$retained <- factor(dataset2$retained)

str(dataset2)

head(data)

model <- glm( retained ~


esent+eopenrate+eclickrate+avgorder+ordfreq+paperless+refill+doorstep+days_to_first_order+tenu
re+number_of_orders+inter_order_purchase_time+favday_Friday+favday_Monday+favday_Saturda
y+favday_Sunday
+favday_Thursday+favday_Tuesday+favday_Wednesday+city_BWI+city_CHO+city_DCX+city_RIC,
data = dataset2, family = binomial)

dataset3 <- readxl::read_excel("test.xlsX",sheet = "test_data")

dataset3 <- na.omit(dataset3)

dataset3 <- dummy_cols(dataset3,select_columns = "favday")

dataset3 <- dummy_cols(dataset3,select_columns = "city")

dataset3$retained <- factor(dataset3$retained)

str(dataset3)

probabilities <- model %>% predict(dataset3, type = "response")

head(probabilities,6)

predicted.classes <- ifelse(probabilities > 0.5,1,0)

accuracy <- mean(predicted.classes == dataset3$retained)

accuracy

dataset2 %>%

mutate(prob = ifelse(retained == "pos", 1, 0)) %>%

ggplot(aes(esent, prob)) +

geom_point(alpha = .15) + #alpha sets the transparency of data points on the chart

geom_smooth(method = "glm", method.args = list(family = "binomial")) +

ggtitle("Logistic regression model fit") +

xlab("Esent")

ylab("Probability")
model1 <- glm( retained ~ esent, data = dataset2, family = binomial)

summary(model1)

tidy(model1)

exp(coef(model1))

newdata <- data.frame(esent = c(40, 80)) #prediction for specific values of glucose

probabilities <- model1 %>% predict(newdata, type = "response")

#redict(model1, newdata, type = "response")

predicted.classes <- ifelse(probabilities > 0.5, "pos", "neg")

predicted.classes

dataset2 %>%

mutate(prob = ifelse(retained == "pos", 1, 0)) %>%

ggplot(aes(esent, prob)) +

geom_point(alpha = 0.2) + #makes points on graph transparent for large datasets

geom_smooth(method = "glm", method.args = list(family = "binomial")) +

labs(

title = "Logistic Regression Model",

x = "Esent",

y = "Probability"

You might also like