BAR-Module 3-Shrinkage Methods-Dim Reduction-ML

##stagewise
##using lars package-trevor hastie-stanford
install.packages("lars")
library(lars)
lars(x, y, type = c("lasso", "lar", "forward.stagewise", "stepwise"),

trace = TRUE, normalize = TRUE)#trace indicates progress of model
#x & y are matrix of predictors & responses, respectively
x<-as.matrix(conc[,1:8])
y<-conc[,9]
model_1<- lars(x,y,type="forward.stagewise",trace = TRUE,

normalize = TRUE)
plot(model_1)
summary(model_1)
var_corr<-cor(conc[,1:9])##steps of addition-var 1, var 5,var 8, etc...
##stepwise
library(MASS)
conc_mass<-conc[,1:9]
model_2 <- lm(Strength~.,data=conc_mass)
fit_1 <- step(lm(Strength~.,data=conc_mass), direction="both")
summary(fit_1)
step$anova # display results
model_3 <- lm(Strength~.,data=conc_mass)

summary(model_3)
step <- stepAIC(lm(Strength~.,data=conc_mass), direction="backward")
step$anova
##Best subsets
library(leaps)
leaps<-regsubsets(Strength~.,data=conc_mass,nbest=4)
# view results
summary(leaps)
# plot a table of models showing variables in each model.
# models are ordered by the selection statistic.
plot(leaps,scale="r2")
# plot statistic by subset size
library(car)
subsets(leaps, statistic="rsq")
Concrete_Data<-read_excel("D:/Analytics Consulting using ML/Concrete_Data.xls"

,sheet = "Sheet1")
##PCR
library(pls)
pcr_model<-pcr(Concrete_Data$`Concrete compressive strength(MPa, megapascals)`~.
,data = Concrete_Data, scale = TRUE, validation = "CV")
summary(pcr_model)
# Plot the root mean squared error

validationplot(pcr_model)
# Plot the cross validation MSE
validationplot(pcr_model, val.type="MSEP")
# Plot the R2
validationplot(pcr_model, val.type = "R2")
##plot the predicted vs measured values

predplot(pcr_model)
coefplot(pcr_model)
# Train-test split
train <- Concrete_Data[1:900,]
y_test <- Concrete_Data[900:1030, 9]
test <- Concrete_Data[900:1030,1:8]
pcr_model<-pcr(Concrete_Data$`Concrete compressive strength(MPa, megapascals)`~.

,data = Concrete_Data,scale =TRUE, validation = "CV")
pcr_pred <- predict(pcr_model, test, ncomp = 5)

mean((pcr_pred - y_test)^2)##= 96.4586
library(glmnet)
# format data
x <- as.matrix(Concrete_Data[,1:8])
y <- as.matrix(Concrete_Data[,9])
# fit model-normal regression
fit <- glmnet(x, y, family="gaussian", alpha=0.5, lambda=0.01)

# summarize the fit
coef(fit)
(Intercept) -20.92893151
Cement (component 1)(kg in a m^3 mixture) 0.11906941
Blast Furnace Slag (component 2)(kg in a m^3 mixture) 0.10301203
Fly Ash (component 3)(kg in a m^3 mixture) 0.08697110
Water (component 4)(kg in a m^3 mixture) -0.15327323
Superplasticizer (component 5)(kg in a m^3 mixture) 0.28933301
Coarse Aggregate (component 6)(kg in a m^3 mixture) 0.01730834
Fine Aggregate (component 7)(kg in a m^3 mixture) 0.01929205
Age (day) 0.11420121
# make predictions
predictions <- predict(fit, x, type="link")
mse <- mean((y - predictions)^2)
print(mse)##linear regression=107.2126
# fit model-ridge regression

fit <- glmnet(x, y, family="gaussian", alpha=0, lambda=.001)
# summarize the fit
coef(fit)
# make predictions
predictions <- predict(fit, x, type="link")
mse <- mean((y - predictions)^2)
print(mse)##linear regression=107.2126
#regression with Dummy variables

reg_model<-lm(Sales~Resale, data = car_sales)
summary(reg_model)
library(DescTools)
car_makers<-Dummy(car_sales$Manufact)
car_makers<-cbind(car_sales$Sales,car_makers)
car_makers<-as.data.frame(car_makers)
reg_model<-lm(V1~., data = car_makers)
summary(reg_model)
reg_model<-lm(Sales~Manufact, data = car_sales)
summary(reg_model)
#####SVD
conc_red<-svd(Concrete_Data)
##look at the 3 component matrices

conc_red$u #(1030*9); conc_red$d (1*9); conc_red$v (9*9)
##% of variance explained

variance.explained = prop.table(conc_red$d^2)
##The third property of the SVD is that the rows of u represents

#the row categories of the original table, and the rows of v represent
#the column categories. The fourth property is that the columns of u are
#orthogonal to each other, and the columns of v are orthogonal to each other.
#With these two properties combined, we end up with considerable simplicity
#in future analyses. For example, this allows us to compute uncorrelated
#principal components in principal components analysis and to produce plots of
#correspondence analysis.
##return back to original dataset
orig<-conc_red$u %*% diag(conc_red$d) %*% t(conc_red$v)

BAR-Module 3-Shrinkage Methods-Dim Reduction-ML

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

BAR-Module 3-Shrinkage Methods-Dim Reduction-ML

Uploaded by

Copyright:

Available Formats

##stagewise

##using lars package-trevor hastie-stanford

lars(x, y, type = c("lasso", "lar", "forward.stagewise", "stepwise"),

model_1<- lars(x,y,type="forward.stagewise",trace = TRUE,

model_3 <- lm(Strength~.,data=conc_mass)

Concrete_Data<-read_excel("D:/Analytics Consulting using ML/Concrete_Data.xls"

# Plot the root mean squared error

##plot the predicted vs measured values

pcr_model<-pcr(Concrete_Data$`Concrete compressive strength(MPa, megapascals)`~.

pcr_pred <- predict(pcr_model, test, ncomp = 5)

fit <- glmnet(x, y, family="gaussian", alpha=0.5, lambda=0.01)

# fit model-ridge regression

#regression with Dummy variables

##look at the 3 component matrices

##% of variance explained

##The third property of the SVD is that the rows of u represents

You might also like