You are on page 1of 3

##stagewise

##using lars package-trevor hastie-stanford

install.packages("lars")
library(lars)

lars(x, y, type = c("lasso", "lar", "forward.stagewise", "stepwise"),


trace = TRUE, normalize = TRUE)#trace indicates progress of model
#x & y are matrix of predictors & responses, respectively
x<-as.matrix(conc[,1:8])
y<-conc[,9]

model_1<- lars(x,y,type="forward.stagewise",trace = TRUE,


normalize = TRUE)
plot(model_1)
summary(model_1)
var_corr<-cor(conc[,1:9])##steps of addition-var 1, var 5,var 8, etc...

##stepwise
library(MASS)
conc_mass<-conc[,1:9]
model_2 <- lm(Strength~.,data=conc_mass)
fit_1 <- step(lm(Strength~.,data=conc_mass), direction="both")
summary(fit_1)
step$anova # display results

model_3 <- lm(Strength~.,data=conc_mass)


summary(model_3)
step <- stepAIC(lm(Strength~.,data=conc_mass), direction="backward")
step$anova

##Best subsets

library(leaps)
leaps<-regsubsets(Strength~.,data=conc_mass,nbest=4)
# view results
summary(leaps)
# plot a table of models showing variables in each model.
# models are ordered by the selection statistic.
plot(leaps,scale="r2")
# plot statistic by subset size
library(car)
subsets(leaps, statistic="rsq")

Concrete_Data<-read_excel("D:/Analytics Consulting using ML/Concrete_Data.xls"


,sheet = "Sheet1")
##PCR
library(pls)
pcr_model<-pcr(Concrete_Data$`Concrete compressive strength(MPa, megapascals)`~.
,data = Concrete_Data, scale = TRUE, validation = "CV")

summary(pcr_model)

# Plot the root mean squared error


validationplot(pcr_model)
# Plot the cross validation MSE
validationplot(pcr_model, val.type="MSEP")

# Plot the R2
validationplot(pcr_model, val.type = "R2")

##plot the predicted vs measured values


predplot(pcr_model)

coefplot(pcr_model)

# Train-test split
train <- Concrete_Data[1:900,]
y_test <- Concrete_Data[900:1030, 9]
test <- Concrete_Data[900:1030,1:8]

pcr_model<-pcr(Concrete_Data$`Concrete compressive strength(MPa, megapascals)`~.


,data = Concrete_Data,scale =TRUE, validation = "CV")

pcr_pred <- predict(pcr_model, test, ncomp = 5)


mean((pcr_pred - y_test)^2)##= 96.4586

library(glmnet)
# format data

x <- as.matrix(Concrete_Data[,1:8])
y <- as.matrix(Concrete_Data[,9])
# fit model-normal regression

fit <- glmnet(x, y, family="gaussian", alpha=0.5, lambda=0.01)


# summarize the fit
coef(fit)
(Intercept) -20.92893151
Cement (component 1)(kg in a m^3 mixture) 0.11906941
Blast Furnace Slag (component 2)(kg in a m^3 mixture) 0.10301203
Fly Ash (component 3)(kg in a m^3 mixture) 0.08697110
Water (component 4)(kg in a m^3 mixture) -0.15327323
Superplasticizer (component 5)(kg in a m^3 mixture) 0.28933301
Coarse Aggregate (component 6)(kg in a m^3 mixture) 0.01730834
Fine Aggregate (component 7)(kg in a m^3 mixture) 0.01929205
Age (day) 0.11420121
# make predictions
predictions <- predict(fit, x, type="link")
mse <- mean((y - predictions)^2)
print(mse)##linear regression=107.2126

# fit model-ridge regression


fit <- glmnet(x, y, family="gaussian", alpha=0, lambda=.001)
# summarize the fit
coef(fit)
# make predictions
predictions <- predict(fit, x, type="link")
mse <- mean((y - predictions)^2)
print(mse)##linear regression=107.2126

#regression with Dummy variables


reg_model<-lm(Sales~Resale, data = car_sales)
summary(reg_model)
library(DescTools)
car_makers<-Dummy(car_sales$Manufact)
car_makers<-cbind(car_sales$Sales,car_makers)
car_makers<-as.data.frame(car_makers)
reg_model<-lm(V1~., data = car_makers)
summary(reg_model)
reg_model<-lm(Sales~Manufact, data = car_sales)
summary(reg_model)

#####SVD

conc_red<-svd(Concrete_Data)

##look at the 3 component matrices


conc_red$u #(1030*9); conc_red$d (1*9); conc_red$v (9*9)

##% of variance explained


variance.explained = prop.table(conc_red$d^2)

##The third property of the SVD is that the rows of u represents


#the row categories of the original table, and the rows of v represent
#the column categories. The fourth property is that the columns of u are
#orthogonal to each other, and the columns of v are orthogonal to each other.
#With these two properties combined, we end up with considerable simplicity
#in future analyses. For example, this allows us to compute uncorrelated
#principal components in principal components analysis and to produce plots of
#correspondence analysis.
##return back to original dataset
orig<-conc_red$u %*% diag(conc_red$d) %*% t(conc_red$v)

You might also like