Pratik Zanke Source Codes PDF

1 Appendix A – Source Code
#=======================================================================
#
# Data Analysis – Factor-Hair-Revised
#
#=======================================================================
##Load readr and dplyr package
library ("psych")
library("readr")
library("dplyr")
library ("corrplot")
library("ggplot2")
library("nFactors")
library("lattice")
library("tidyr")
library("tidyverse")
library("car")
##Set Working Directory
setwd("F:/project")
getwd()
##Import File
fact = read.csv("Factor-Hair-Revised-1.csv",header = TRUE)
##EXPLORATORY ANALYSIS
##Check dimensionality
dim(fact)
##Class and names of data
class(fact)
names(fact)
##Check for missing values
any(is.na(fact))
[1] FALSE
##Structure of data
1
str(fact)
data.frame': 100 obs. of 13 variables:
$ ID : int 1 2 3 4 5 6 7 8 9 10 ...
$ ProdQual : num 8.5 8.2 9.2 6.4 9 6.5 6.9 6.2 5.8 6.4 ...
$ Ecom : num 3.9 2.7 3.4 3.3 3.4 2.8 3.7 3.3 3.6 4.5 ...
$ TechSup : num 2.5 5.1 5.6 7 5.2 3.1 5 3.9 5.1 5.1 ...
$ CompRes : num 5.9 7.2 5.6 3.7 4.6 4.1 2.6 4.8 6.7 6.1 ...
$ Advertising : num 4.8 3.4 5.4 4.7 2.2 4 2.1 4.6 3.7 4.7 ...
$ ProdLine : num 4.9 7.9 7.4 4.7 6 4.3 2.3 3.6 5.9 5.7 ...
$ SalesFImage : num 6 3.1 5.8 4.5 4.5 3.7 5.4 5.1 5.8 5.7 ...
$ ComPricing : num 6.8 5.3 4.5 8.8 6.8 8.5 8.9 6.9 9.3 8.4 ...
$ WartyClaim : num 4.7 5.5 6.2 7 6.1 5.1 4.8 5.4 5.9 5.4 ...
$ OrdBilling : num 5 3.9 5.4 4.3 4.5 3.6 2.1 4.3 4.4 4.1 ...
$ DelSpeed : num 3.7 4.9 4.5 3 3.5 3.3 2 3.7 4.6 4.4 ...
$ Satisfaction: num 8.2 5.7 8.9 4.8 7.1 4.7 5.7 6.3 7 5.5 ...
##Summary of the dataset
summary(fact)
ID ProdQual Ecom
Min. : 1.00 Min. : 5.000 Min. :2.200
1st Qu.: 25.75 1st Qu.: 6.575 1st Qu.:3.275
Median : 50.50 Median : 8.000 Median :3.600
Mean : 50.50 Mean : 7.810 Mean :3.672
3rd Qu.: 75.25 3rd Qu.: 9.100 3rd Qu.:3.925
Max. :100.00 Max. :10.000 Max. :5.700
TechSup CompRes Advertising
Min. :1.300 Min. :2.600 Min. :1.900
1st Qu.:4.250 1st Qu.:4.600 1st Qu.:3.175
Median :5.400 Median :5.450 Median :4.000
Mean :5.365 Mean :5.442 Mean :4.010
3rd Qu.:6.625 3rd Qu.:6.325 3rd Qu.:4.800
Max. :8.500 Max. :7.800 Max. :6.500
ProdLine SalesFImage ComPricing
Min. :2.300 Min. :2.900 Min. :3.700
1st Qu.:4.700 1st Qu.:4.500 1st Qu.:5.875
Mean :5.805 Mean :5.123 Mean :6.974
3rd Qu.:6.800 3rd Qu.:5.800 3rd Qu.:8.400
Max. :8.400 Max. :8.200 Max. :9.900
WartyClaim OrdBilling DelSpeed
Min. :4.100 Min. :2.000 Min. :1.600
1st Qu.:5.400 1st Qu.:3.700 1st Qu.:3.400
Mean :6.043 Mean :4.278 Mean :3.886
3rd Qu.:6.600 3rd Qu.:4.800 3rd Qu.:4.425
Max. :8.100 Max. :6.700 Max. :5.500
Satisfaction
Min. :4.700
1st Qu.:6.000
Median :7.050
Mean :6.918
3rd Qu.:7.625
Max. :9.900
2
describe(fact)
vars n mean sd median trimmed mad
ID 1 100 50.50 29.01 50.50 50.50 37.06
ProdQual 2 100 7.81 1.40 8.00 7.85 1.78
Ecom 3 100 3.67 0.70 3.60 3.63 0.52
TechSup 4 100 5.37 1.53 5.40 5.40 1.85
CompRes 5 100 5.44 1.21 5.45 5.46 1.26
Advertising 6 100 4.01 1.13 4.00 4.00 1.19
ProdLine 7 100 5.80 1.32 5.75 5.81 1.56
SalesFImage 8 100 5.12 1.07 4.90 5.09 0.89
ComPricing 9 100 6.97 1.55 7.10 7.01 1.93
WartyClaim 10 100 6.04 0.82 6.10 6.04 0.89
OrdBilling 11 100 4.28 0.93 4.40 4.31 0.74
DelSpeed 12 100 3.89 0.73 3.90 3.92 0.74
Satisfaction 13 100 6.92 1.19 7.05 6.90 1.33
min max range skew kurtosis se
ID 1.0 100.0 99.0 0.00 -1.24 2.90
ProdQual 5.0 10.0 5.0 -0.24 -1.17 0.14
Ecom 2.2 5.7 3.5 0.64 0.57 0.07
TechSup 1.3 8.5 7.2 -0.20 -0.63 0.15
CompRes 2.6 7.8 5.2 -0.13 -0.66 0.12
Advertising 1.9 6.5 4.6 0.04 -0.94 0.11
ProdLine 2.3 8.4 6.1 -0.09 -0.60 0.13
SalesFImage 2.9 8.2 5.3 0.37 0.26 0.11
ComPricing 3.7 9.9 6.2 -0.23 -0.96 0.15
WartyClaim 4.1 8.1 4.0 0.01 -0.53 0.08
OrdBilling 2.0 6.7 4.7 -0.32 0.11 0.09
DelSpeed 1.6 5.5 3.9 -0.45 0.09 0.07
Satisfaction 4.7 9.9 5.2 0.08 -0.86 0.12
attach(fact)
fact1=fact[-c(1)]
##Univariate graphs
histogram(Satisfaction,xlab = "Satisfaction",ylab="Count", xlim=c(0,11), ylim = c(0,35))

boxplot(Satisfaction,xlab= colnames(fact1), ylim=c(0,10))
densityplot(Satisfaction, xlab = "Satisfaction", ylab = "count", xlim=c(0,11), ylim =c(0,0.3))
fact2=fact1[-c(12)]
multi.hist(fact2,bcol = "grey",xlim = c(0,11))
par(mfrow = c(2,1))
boxplot(fact2, las=2, cex.axis = 0.7)
par(mfrow = c(1,1))
##Bivariate graphs
par(mar = c(1.9,1.9,1.9,1.9))
par(mfrow = c(4,3))
for (i in c(1:11)) {
plot(fact1[,i],Satisfaction,xlab = colnames(fact1[,c(1:11)]),ylab = "Satisfaction", col= "red", xlim= c(0,9),
ylim=c(0,9),cex.axis = 0.75, cex.lab = 1.7,col.lab = "blue" )
abline(lm(formula = Satisfaction~fact1[,i]),col = "blue")
}
3
##Outliers in boxplot
out1=boxplot(fact2$ProdQual, plot = FALSE)$out

out2 = boxplot(fact2$Ecom, plot = FALSE)$out
out3= boxplot(fact2$TechSup, plot = FALSE)$out
out4 = boxplot(fact2$CompRes, plot = FALSE)$out
out5 = boxplot(fact2$Advertising, plot = FALSE)$out
out6 = boxplot(fact2$ProdLine, plot = FALSE)$out
out7 =boxplot(fact2$SalesFImage, plot = FALSE)$out
out8 = boxplot(fact2$ComPricing, plot = FALSE)$out
out9=boxplot(fact2$WartyClaim, plot = FALSE)$out
out10=boxplot(fact2$OrdBilling, plot = FALSE)$out
out11=boxplot(fact2$DelSpeed, plot = FALSE)$out
outliers = cbind(out1,out2,out3, out4,out5,out6,out7,out8,out9,out10,out11)
view(outliers)
outliers = as.data.frame(outliers)
colnames(outliers)
colnames(outliers)= c("ecom","sales","ordbill","delsp")
outliers
write.csv(outliers,"outlier1.csv") ##outliers of the data
##.MULTICOLLINEARITY
correlation = cor(fact1)
write.csv(correlation, "cormat.csv")
corrplot(correlation, method = "number",number.digits = 2,number.cex = 0.8, tl.offset = 2,tl.cex = 1,
win.graph(4500,4500))
###Multicollinearity using VIF matrix
model1 = lm(Satisfaction~.,data = fact1)

vif(model1)
ProdQual Ecom TechSup CompRes
1.635797 2.756694 2.976796 4.730448
Advertising ProdLine SalesFImage ComPricing
1.508933 3.488185 3.439420 1.635000
3.198337 2.902999 6.516014
4
##LINEAR REGRESSION
lm.prodqual = lm(Satisfaction~ProdQual, fact)

lm.prodqual
summary(lm.prodqual)
plot(ProdQual,Satisfaction)
lines(loess.smooth(ProdQual, Satisfaction), col = "Red")
Call:
lm(formula = Satisfaction ~ ProdQual, data = fact)
Coefficients:
(Intercept) ProdQual
3.6759 0.4151
> summary(lm.prodqual)
Call:
lm(formula = Satisfaction ~ ProdQual, data = fact)
Residuals:
Min 1Q Median 3Q Max
-1.88746 -0.72711 -0.01577 0.85641 2.25220
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.67593 0.59765 6.151 1.68e-08 ***
ProdQual 0.41512 0.07534 5.510 2.90e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.047 on 98 degrees of freedom
Multiple R-squared: 0.2365, Adjusted R-squared: 0.2287
F-statistic: 30.36 on 1 and 98 DF, p-value: 2.901e-07
lm.ecom = lm(Satisfaction~Ecom, fact)

lm.ecom
summary(lm.ecom)
plot(Ecom,Satisfaction)
lines(loess.smooth(Ecom, Satisfaction), col = "Red")
> lm.ecom
Call:
lm(formula = Satisfaction ~ Ecom, data = fact)
Coefficients:
(Intercept) Ecom
5.1516 0.4811
> summary(lm.ecom)
Call:
lm(formula = Satisfaction ~ Ecom, data = fact)
Residuals:
-2.37200 -0.78971 0.04959 0.68085 2.34580
Coefficients:
(Intercept) 5.1516 0.6161 8.361 4.28e-13 ***
5
Ecom 0.4811 0.1649 2.918 0.00437 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
F-statistic: 8.515 on 1 and 98 DF, p-value: 0.004368
lm.techsup = lm(Satisfaction~TechSup, fact)

lm.techsup
summary(lm.techsup)
plot(TechSup,Satisfaction)
lines(loess.smooth(TechSup, Satisfaction), col = "Red")
> lm.techsup
Call:
lm(formula = Satisfaction ~ TechSup, data = fact)
Coefficients:
(Intercept) TechSup
6.44757 0.08768
> summary(lm.techsup)
Call:
lm(formula = Satisfaction ~ TechSup, data = fact)
Residuals:
-2.26136 -0.93297 0.04302 0.82501 2.85617
Coefficients:
(Intercept) 6.44757 0.43592 14.791 <2e-16 ***
TechSup 0.08768 0.07817 1.122 0.265
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
TechSup 0.08768 0.07817 1.122 0.265
lm.compres = lm(Satisfaction~CompRes, fact)
lm.compres
summary(lm.compres)
plot(CompRes,Satisfaction)
lines(loess.smooth(CompRes, Satisfaction), col = "Red")
> lm.compres
Call:
lm(formula = Satisfaction ~ CompRes, data = fact)
Coefficients:
(Intercept) CompRes
3.680 0.595
> summary(lm.compres)
6
Call:
lm(formula = Satisfaction ~ CompRes, data = fact)
Residuals:
-2.40450 -0.66164 0.04499 0.63037 2.70949
Coefficients:
(Intercept) 3.68005 0.44285 8.310 5.51e-13 ***
CompRes 0.59499 0.07946 7.488 3.09e-11 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

lm.Advertising = lm(Satisfaction~Advertising, fact)

lm.Advertising
summary(lm.Advertising)
plot(Advertising,Satisfaction)
lines(loess.smooth(Advertising, Satisfaction), col = "Red")
> lm.Advertising
Call:
lm(formula = Satisfaction ~ Advertising, data = fact)
Coefficients:
(Intercept) Advertising
5.6259 0.3222
> summary(lm.Advertising)
Call:
lm(formula = Satisfaction ~ Advertising, data = fact)
Residuals:
-2.34033 -0.92755 0.05577 0.79773 2.53412
Coefficients:
(Intercept) 5.6259 0.4237 13.279 < 2e-16 ***
Advertising 0.3222 0.1018 3.167 0.00206 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
lm.ProdLine = lm(Satisfaction~ProdLine, fact)

lm.ProdLine
summary(lm.ProdLine)
plot(ProdLine,Satisfaction)
lines(loess.smooth(ProdLine, Satisfaction), col = "Red")
7
> lm.ProdLine
Call:
lm(formula = Satisfaction ~ ProdLine, data = fact)
Coefficients:
(Intercept) ProdLine
4.0220 0.4989
> summary(lm.ProdLine)
Call:
lm(formula = Satisfaction ~ ProdLine, data = fact)
Residuals:
-2.3634 -0.7795 0.1097 0.7604 1.7373
Coefficients:
(Intercept) 4.02203 0.45471 8.845 3.87e-14 ***
ProdLine 0.49887 0.07641 6.529 2.95e-09 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1 on 98 degrees of freedom
lm.Salesimage = lm(Satisfaction~SalesFImage, fact)

lm.Salesimage
summary(lm.Salesimage)
plot(SalesFImage,Satisfaction)
lines(loess.smooth(SalesFImage, Satisfaction), col = "Red")
> lm.Salesimage
Call:
lm(formula = Satisfaction ~ SalesFImage, data = fact)
Coefficients:
(Intercept) SalesFImage
4.070 0.556
> summary(lm.Salesimage)
Call:
lm(formula = Satisfaction ~ SalesFImage, data = fact)
Residuals:
-2.2164 -0.5884 0.1838 0.6922 2.0728
Coefficients:
(Intercept) 4.06983 0.50874 8.000 2.54e-12 ***
SalesFImage 0.55596 0.09722 5.719 1.16e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
8
lm.compricing = lm(Satisfaction~ComPricing, fact)

lm.compricing
summary(lm.compricing)
plot(ComPricing,Satisfaction)
lines(loess.smooth(ComPricing, Satisfaction), col = "Red")
> lm.compricing
Call:
lm(formula = Satisfaction ~ ComPricing, data = fact)
Coefficients:
(Intercept) ComPricing
8.0386 -0.1607
> summary(lm.compricing)
Call:
lm(formula = Satisfaction ~ ComPricing, data = fact)
Residuals:
-1.9728 -0.9915 -0.1156 0.9111 2.5845
Coefficients:
(Intercept) 8.03856 0.54427 14.769 <2e-16 ***
ComPricing -0.16068 0.07621 -2.108 0.0376 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
lm.warty = lm(Satisfaction~WartyClaim, fact)

lm.warty
summary(lm.warty)
plot(WartyClaim,Satisfaction)
lines(loess.smooth(WartyClaim, Satisfaction), col = "Red")
> lm.warty
Call:
lm(formula = Satisfaction ~ WartyClaim, data = fact)
Coefficients:
(Intercept) WartyClaim
5.3581 0.2581
> summary(lm.warty)
Call:
lm(formula = Satisfaction ~ WartyClaim, data = fact)
Residuals:
9
-2.36504 -0.90202 0.03019 0.90763 2.88985
Coefficients:
(Intercept) 5.3581 0.8813 6.079 2.32e-08 ***
WartyClaim 0.2581 0.1445 1.786 0.0772 .
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
lm.ordbill = lm(Satisfaction~OrdBilling, fact)

lm.ordbill
summary(lm.ordbill)
plot(OrdBilling,Satisfaction)
lines(loess.smooth(OrdBilling, Satisfaction), col = "Red")
> lm.ordbill
Call:
lm(formula = Satisfaction ~ OrdBilling, data = fact)
Coefficients:
(Intercept) OrdBilling
4.0541 0.6695
> summary(lm.ordbill)
Call:
lm(formula = Satisfaction ~ OrdBilling, data = fact)
Residuals:
-2.4005 -0.7071 -0.0344 0.7340 2.9673
Coefficients:
(Intercept) 4.0541 0.4840 8.377 3.96e-13 ***
OrdBilling 0.6695 0.1106 6.054 2.60e-08 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

lm.delsp = lm(Satisfaction~DelSpeed, fact)

lm.delsp
summary(lm.delsp)
plot(DelSpeed,Satisfaction)
lines(loess.smooth(DelSpeed, Satisfaction), col = "Red")
> lm.delsp
Call:
lm(formula = Satisfaction ~ DelSpeed, data = fact)
10
Coefficients:
(Intercept) DelSpeed
3.2791 0.9364
> summary(lm.delsp)
Call:
lm(formula = Satisfaction ~ DelSpeed, data = fact)
Residuals:
-2.22475 -0.54846 0.08796 0.54462 2.59432
Coefficients:
(Intercept) 3.2791 0.5294 6.194 1.38e-08 ***
DelSpeed 0.9364 0.1339 6.994 3.30e-10 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
PRINCIPAL COMPONENT AND FACTOR ANALYSIS
##Bartlett sphericity test for checking PCA/FA
print(cortest.bartlett(fact2,nrow(fact1)))
> print(cortest.bartlett(fact2,nrow(fact1)))
R was not square, finding R from data
$chisq
[1] 619.2726
$p.value
[1] 1.79337e-96
$df
[1] 55
##KMO test for checking FA
kmo1 = cor(fact2)
KMO(r = kmo1)
> kmo1 = cor(fact2)

> KMO(r = kmo1)
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = kmo1)
Overall MSA = 0.65
MSA for each item =
ProdQual Ecom TechSup CompRes
0.51 0.63 0.52 0.79
Advertising ProdLine SalesFImage ComPricing
0.78 0.62 0.62 0.75
0.51 0.76 0.67
11
##PCA
##Eigen values and scree plot
ev = eigen(cor(fact2))
eigenval = ev$values
eigenval
> eigenval
[1] 3.42697133 2.55089671 1.69097648 1.08655606
[5] 0.60942409 0.55188378 0.40151815 0.24695154
[9] 0.20355327 0.13284158 0.09842702
factor = c(1,2,3,4,5,6,7,8,9,10,11)
scree = data.frame(factor,eigenval)
plot(scree, col = "blue",xlab ="Factors",ylab="Eigen values")
lines(scree, col = "red")
abline(h=1,col = "green")
pcamodel1 = principal(fact2,nfactors = 4,rotate = "none")

pcamodel1
print(pcamodel1, digits = 6) > print(pcamodel1, digits = 6)
Principal Components Analysis
Call: principal(r = fact2, nfactors = 4, rotate = "none")
Standardized loadings (pattern matrix) based upon correlation matrix
PC1 PC2 PC3 PC4
ProdQual 0.247672 -0.500704 -0.080977 0.670392
Ecom 0.307213 0.713143 0.305911 0.283924
TechSup 0.291922 -0.368890 0.794466 -0.201589
CompRes 0.871333 0.031055 -0.273535 -0.215064
Advertising 0.340132 0.580828 0.114559 0.331367
ProdLine 0.715983 -0.454838 -0.151206 0.211501
SalesFImage 0.377035 0.751773 0.313835 0.231593
ComPricing -0.280807 0.660353 -0.068979 -0.347679
WartyClaim 0.394184 -0.306129 0.778358 -0.193155
OrdBilling 0.809381 0.042163 -0.219672 -0.246892
DelSpeed 0.875786 0.116675 -0.302500 -0.205693
h2 u2 com
ProdQual 0.768029 0.2319706 2.19574
Ecom 0.777147 0.2228526 2.13556
TechSup 0.893112 0.1068876 1.87323
CompRes 0.881260 0.1187399 1.32947
Advertising 0.575979 0.4240214 2.37941
ProdLine 0.787105 0.2128950 2.01074
SalesFImage 0.859446 0.1405536 2.09727
ComPricing 0.640558 0.3594422 1.94456
WartyClaim 0.892247 0.1077535 1.98351
OrdBilling 0.766087 0.2339133 1.34855
DelSpeed 0.914430 0.0855696 1.39680
PC1 PC2 PC3

SS loadings 3.426971 2.550897 1.690976
Proportion Var 0.311543 0.231900 0.153725
Cumulative Var 0.311543 0.543443 0.697168
Proportion Explained 0.391412 0.291351 0.193135
Cumulative Proportion 0.391412 0.682764 0.875899
PC4
SS loadings 1.086556
Proportion Var 0.098778
12
Cumulative Var 0.795946
Proportion Explained 0.124101
Cumulative Proportion 1.000000
Mean item complexity = 1.9
Test of the hypothesis that 4 components are sufficient.
The root mean square of the residuals (RMSR) is 0.059561
with the empirical chi square 39.02253 with prob < 0.00177435
Fit based upon off diagonal values = 0.968166
pcamodel2 = principal(fact2,nfactors = 4,rotate = "varimax")

print(pcamodel2, digits = 2)
> print(pcamodel2, digits = 2)

Principal Components Analysis
Call: principal(r = fact2, nfactors = 4, rotate = "varimax")
RC1 RC2 RC3 RC4 h2 u2 com
ProdQual 0.00 -0.01 -0.03 0.88 0.77 0.232 1.0
Ecom 0.06 0.87 0.05 -0.12 0.78 0.223 1.1
TechSup 0.02 -0.02 0.94 0.10 0.89 0.107 1.0
CompRes 0.93 0.12 0.05 0.09 0.88 0.119 1.1
Advertising 0.14 0.74 -0.08 0.01 0.58 0.424 1.1
ProdLine 0.59 -0.06 0.15 0.64 0.79 0.213 2.1
SalesFImage 0.13 0.90 0.08 -0.16 0.86 0.141 1.1
ComPricing -0.09 0.23 -0.25 -0.72 0.64 0.359 1.5
WartyClaim 0.11 0.05 0.93 0.10 0.89 0.108 1.1
OrdBilling 0.86 0.11 0.08 0.04 0.77 0.234 1.1
DelSpeed 0.94 0.18 0.00 0.05 0.91 0.086 1.1
RC1 RC2 RC3 RC4

SS loadings 2.89 2.23 1.86 1.77
Proportion Var 0.26 0.20 0.17 0.16
Cumulative Var 0.26 0.47 0.63 0.80
Proportion Explained 0.33 0.26 0.21 0.20
Cumulative Proportion 0.33 0.59 0.80 1.00
Test of the hypothesis that 4 components are sufficient.
with the empirical chi square 39.02 with prob < 0.0018
Fit based upon off diagonal values = 0.97
##PCA Factor Graph
pcareduced = print(pcamodel2$loadings, cutoff = 0.3)

fa.diagram(pcareduced,cex = 0.7,e.size = 0.1, rsize = 0.15)
##Using Factor analysis
factana = fa(r=fact2,nfactors = 4,rotate = "varimax",fm = "pa")

print(factana, digits = 2)
13
> print(factana, digits = 2)
Factor Analysis using method = pa
Call: fa(r = fact2, nfactors = 4, rotate = "varimax", fm = "pa")
PA1 PA2 PA3 PA4 h2 u2 com
ProdQual 0.02 -0.07 0.02 0.65 0.42 0.576 1.0
Ecom 0.07 0.79 0.03 -0.11 0.64 0.362 1.1
TechSup 0.02 -0.03 0.88 0.12 0.79 0.205 1.0
CompRes 0.90 0.13 0.05 0.13 0.84 0.157 1.1
Advertising 0.17 0.53 -0.04 -0.06 0.31 0.686 1.2
ProdLine 0.53 -0.04 0.13 0.71 0.80 0.200 1.9
SalesFImage 0.12 0.97 0.06 -0.13 0.98 0.021 1.1
ComPricing -0.08 0.21 -0.21 -0.59 0.44 0.557 1.6
WartyClaim 0.10 0.06 0.89 0.13 0.81 0.186 1.1
OrdBilling 0.77 0.13 0.09 0.09 0.62 0.378 1.1
DelSpeed 0.95 0.19 0.00 0.09 0.94 0.058 1.1
PA1 PA2 PA3 PA4

SS loadings 2.63 1.97 1.64 1.37
Proportion Var 0.24 0.18 0.15 0.12
Cumulative Var 0.24 0.42 0.57 0.69
Proportion Explained 0.35 0.26 0.22 0.18
Cumulative Proportion 0.35 0.60 0.82 1.00
Test of the hypothesis that 4 factors are sufficient.
The degrees of freedom for the null model are 55 and the objective fu
nction was 6.55 with Chi Square of 619.27
The degrees of freedom for the model are 17 and the objective function
was 0.33

The df corrected root mean square of the residuals is 0.03
The harmonic number of observations is 100 with the empirical chi squa
re 3.19 with prob < 1
The total number of observations was 100 with Likelihood Chi Square =
30.27 with prob < 0.024
Tucker Lewis Index of factoring reliability = 0.921
RMSEA index = 0.096 and the 90 % confidence intervals are 0.032 0.13
9
BIC = -48.01
Fit based upon off diagonal values = 1
Measures of factor score adequacy
PA1
Correlation of (regression) scores with factors 0.98
Multiple R square of scores with factors 0.96
Minimum correlation of possible factor scores 0.93
PA2
PA3
PA4
14
factanared = print(factana$loadings, cutoff = 0.3)
##Plotting FA Factor Graph
fa.diagram(factanared,cex = 0.7,e.size = 0.1,rsize = 0.15)
##Obtaining PCA and FA scores as dataframes
pcascores = as.data.frame(pcamodel2$scores)
pcascores
write.csv(pcascores,"pcascores.csv")
fascores = as.data.frame(factana$scores)
fascores
write.csv(fascores, "fascores.csv")
##REGRESSION MODEL
##Regression modelling without splitting the data
##PCA
pcaregdata = cbind(fact[,13],pcascores)
class(pcaregdata)
colnames(pcaregdata)=c("Satisfaction","Purchase","Marketing","Afterservice","Productqual")
pcaregdata
head(pcaregdata)
pcaregdata1 = round(pcaregdata,2)
write.csv(pcaregdata1, "PCAregscores.csv")
##FA
faregdata = cbind(fact[,13],fascores)
class(faregdata)
faregdata1 = round(faregdata,2)##rounded fa scores
colnames(faregdata1) = c("CustSat", "Sales", "Brandim","Aftersaleexp","Prodplace")
write.csv(faregdata1, "FAregscores.csv")
###PCA Regression (without splitting)
pcaregmodel=lm(Satisfaction~Purchase+Marketing+Afterservice+Productqual,data=pcaregdata1)
summary(pcaregmodel)
> pcaregmodel = lm(Satisfaction~Purchase+Marketing+Afterservice+Product

qual,data=pcaregdata1)
> summary(pcaregmodel)
Call:
lm(formula = Satisfaction ~ Purchase + Marketing + Afterservice +
Productqual, data = pcaregdata1)
Residuals:
-1.6346 -0.5021 0.1368 0.4617 1.5235
Coefficients:
15
(Intercept) 6.91813 0.07087 97.617 < 2e-16 ***
Purchase 0.61799 0.07122 8.677 1.11e-13 ***
Marketing 0.50994 0.07123 7.159 1.71e-10 ***
Afterservice 0.06686 0.07120 0.939 0.35
Productqual 0.54014 0.07124 7.582 2.27e-11 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

F-statistic: 46.25 on 4 and 95 DF, p-value: < 2.2e-16
##Test validity of PCA by Backtracking model(without data splitting)
pred.sat = predict(pcaregmodel)
as.data.frame(pred.sat)
write.csv(pred.sat,"PCAPrediction.csv")
head(pred.sat)
backtrack = cbind(pcaregdata1,pred.sat)
write.csv(backtrack, "PCAbacktrack.csv")
##PCA Backtracking graph
plot(backtrack$Satisfaction, col="red")
lines(backtrack$Satisfaction, col="red")
plot(backtrack$pred.sat,col="blue", xlab=Satisfaction)
lines(backtrack$pred.sat,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack$Satisfaction, col="red")
text(60,9,"Actual Value",col = "red")
text(15,9,"Predicted Value", col = "blue")
###FA Regression without data splitting
faregmodel = lm(CustSat~Sales+Brandim+Aftersaleexp+Prodplace, data=faregdata1)

summary(faregmodel)
> faregmodel = lm(CustSat~Sales+Brandim+Aftersaleexp+Prodplace, data=fa

regdata1)
> summary(faregmodel)
Call:
lm(formula = CustSat ~ Sales + Brandim + Aftersaleexp + Prodplace,
data = faregdata1)
Residuals:
-1.70957 -0.46940 0.09918 0.41318 1.35209
Coefficients:
(Intercept) 6.91778 0.06693 103.358 < 2e-16 ***
Sales 0.57956 0.06856 8.453 3.32e-13 ***
Brandim 0.61964 0.06831 9.071 1.60e-14 ***
Aftersaleexp 0.05774 0.07171 0.805 0.423
Prodplace 0.61187 0.07650 7.998 3.03e-12 ***
---
Signif. codes:
16
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
F-statistic: 54.73 on 4 and 95 DF, p-value: < 2.2e-16
##Backtracking on FA model (without data splitting)
pred.sat1=predict(faregmodel)
as.data.frame(pred.sat1)
write.csv(pred.sat1,"FA prediction.csv")
head(pred.sat1)
backtrack1 = cbind(faregdata1,pred.sat1)
write.csv(backtrack1,"FABacktrack.csv")
##FA Backtrack plot
plot(backtrack1$CustSat, col="red")
lines(backtrack1$CustSat,col ="red")
plot(backtrack1$pred.sat1,col="blue", xlab=Satisfaction)
lines(backtrack1$pred.sat1,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack1$CustSat, col="red")
text(62,5.1,"Predicted Value", col = "blue")
##Regression modelling using train and test data
###Splitting PCA Regression data
set.seed(100)
pcasplit = sample(1:nrow(pcaregdata1), 0.7*nrow(pcaregdata1))
pcatrain = pcaregdata1[pcasplit,]
pcatest = pcaregdata1[-pcasplit,]
class(pcatrain)
write.csv(pcatrain, "PCAtrain.csv")
write.csv(pcatest, "PCAtest.csv")
###PCA regression on test and train data
pcatrainreg = lm(Satisfaction~.,data= pcatrain)

summary(pcatrainreg)
> summary(pcatrainreg)
Call:
lm(formula = Satisfaction ~ ., data = pcatrain)
Residuals:
-1.62117 -0.49516 0.03112 0.44424 1.72181
Coefficients:
(Intercept) 6.89394 0.09064 76.055 < 2e-16 ***
Purchase 0.56130 0.08285 6.775 4.33e-09 ***
17
Marketing 0.59052 0.09065 6.514 1.24e-08 ***
Afterservice 0.09847 0.08683 1.134 0.261
Productqual 0.47035 0.08590 5.475 7.55e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pcatestreg = lm(Satisfaction~., data = pcatest)

summary(pcatestreg)
> summary(pcatestreg)
Call:
lm(formula = Satisfaction ~ ., data = pcatest)
Residuals:
-0.7828 -0.3017 0.0270 0.2832 0.8652
Coefficients:
(Intercept) 7.15569 0.08762 81.669 < 2e-16 ***
Purchase 0.93570 0.10314 9.072 2.20e-09 ***
Marketing 0.23133 0.08354 2.769 0.0104 *
Afterservice -0.01100 0.08849 -0.124 0.9021
Productqual 0.85112 0.09574 8.890 3.26e-09 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

##Backtracking for test and train data (PCA)
##PCA train backtrack
pred.satttrain = predict(pcatrainreg)
as.data.frame(pred.satttrain)
head(pred.satttrain)
write.csv(pred.satttrain, "PCAtrainpredict.csv")
backtrack2 = cbind(pcatrain,pred.satttrain)
write.csv(backtrack2,"pcasplitbact.csv")
plot(backtrack2$Satisfaction, col="red")
lines(backtrack2$Satisfaction, col="red")
plot(backtrack2$pred.satttrain,col="blue", xlab=Satisfaction)
lines(backtrack2$pred.satttrain,col="blue", xlab = "Count", ylab = "Satisfaction")
text(40.1,5,"Predicted Value", col = "blue")
##PCA test backtrack
18
pred.satttest = predict(pcatestreg)
as.data.frame(pred.satttest)
head(pred.satttest)
write.csv(pred.satttest, "PCAtestpredict.csv")
backtrack3 = cbind(pcatest,pred.satttest)
write.csv(backtrack3,"pcatestsplitbact.csv")
plot(backtrack3$Satisfaction, col="red")
plot(backtrack3$pred.satttest,col="blue", xlab=Satisfaction)
lines(backtrack3$pred.satttest,col="blue", xlab = "Count", ylab = "Satisfaction")
text(9,7.45,"Actual Value",col = "red")
text(24.3,8.2,"Predicted Value", col = "blue")
###Splitting FA regression data for FA analysis
fasplit= sample (1:nrow(faregdata1), 0.7*nrow(faregdata1))

fatrain = faregdata1[fasplit,]
fatest = faregdata1[-fasplit,]
write.csv(fatrain, "FAtrain.csv")
write.csv(fatest, "FAtest.csv")
##FA regression on test and train data
fatrainreg = lm(CustSat~., data=fatrain)

summary(fatrainreg)
> summary(fatrainreg)
Call:
lm(formula = CustSat ~ ., data = fatrain)
Residuals:
-1.62091 -0.40715 -0.02284 0.48897 1.37143
Coefficients:
(Intercept) 6.88507 0.07720 89.183 < 2e-16 ***
Sales 0.60031 0.07416 8.095 1.98e-11 ***
Brandim 0.58485 0.08508 6.875 2.89e-09 ***
Aftersaleexp -0.01849 0.07925 -0.233 0.816
Prodplace 0.55627 0.09408 5.913 1.37e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
fatestreg = lm(CustSat~.,data=fatest)
summary(fatestreg)
19
> summary(fatestreg)
Call:
lm(formula = CustSat ~ ., data = fatest)
Residuals:
-1.88613 -0.35542 -0.07904 0.54605 1.15182
Coefficients:
(Intercept) 7.0102 0.1343 52.203 < 2e-16 ***
Sales 0.5169 0.1757 2.942 0.006938 **
Brandim 0.6485 0.1434 4.523 0.000128 ***
Aftersaleexp 0.3002 0.1722 1.743 0.093609 .
Prodplace 0.6142 0.1762 3.487 0.001826 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
##Backtracking for FA Test and train data
### FA train data backtrack
fapred.satttrain = predict(fatrainreg)
as.data.frame(fapred.satttrain)
head(fapred.satttrain)
write.csv(fapred.satttrain, "FAtrainpredict.csv")
fabacktrack = cbind(fatrain,fapred.satttrain)
write.csv(fabacktrack,"fatrainbact.csv")
plot(fabacktrack$CustSat, col="red")
lines(fabacktrack$CustSat, col="red")
plot(fabacktrack$fapred.satttrain,col="blue", xlab=Satisfaction)
lines(fabacktrack$fapred.satttrain,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(fabacktrack$CustSat, col="red")
##FA test data backtrack

fapred.satttest = predict(fatestreg)
as.data.frame(fapred.satttest)
head(fapred.satttest)
write.csv(fapred.satttest, "FAtestpredict.csv")
fabacktrack2 = cbind(fatest,fapred.satttest)
write.csv(fabacktrack2,"fatestbact.csv")
plot(fabacktrack2$CustSat, col="red")
lines(fabacktrack2$CustSat, col="red")
plot(fabacktrack2$fapred.satttest,col="blue", xlab=Satisfaction)
lines(fabacktrack2$fapred.satttest,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(fabacktrack2$CustSat, col="red")
text(14,8.7,"Actual Value",col = "red")
20

Pratik Zanke Source Codes PDF

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Pratik Zanke Source Codes PDF

Uploaded by

Copyright:

Available Formats

1 Appendix A – Source Code

##Set Working Directory

##Class and names of data

##Check for missing values

##Summary of the dataset

histogram(Satisfaction,xlab = "Satisfaction",ylab="Count", xlim=c(0,11), ylim = c(0,35))

out1=boxplot(fact2$ProdQual, plot = FALSE)$out

###Multicollinearity using VIF matrix

model1 = lm(Satisfaction~.,data = fact1)

lm.prodqual = lm(Satisfaction~ProdQual, fact)

lm.ecom = lm(Satisfaction~Ecom, fact)

lm.techsup = lm(Satisfaction~TechSup, fact)

Residual standard error: 0.9554 on 98 degrees of freedom

lm.Advertising = lm(Satisfaction~Advertising, fact)

lm.ProdLine = lm(Satisfaction~ProdLine, fact)

lm.Salesimage = lm(Satisfaction~SalesFImage, fact)

lm.compricing = lm(Satisfaction~ComPricing, fact)

lm.warty = lm(Satisfaction~WartyClaim, fact)

lm.ordbill = lm(Satisfaction~OrdBilling, fact)

Residual standard error: 1.022 on 98 degrees of freedom

lm.delsp = lm(Satisfaction~DelSpeed, fact)

PRINCIPAL COMPONENT AND FACTOR ANALYSIS

##Bartlett sphericity test for checking PCA/FA

##KMO test for checking FA

> kmo1 = cor(fact2)

##Eigen values and scree plot

pcamodel1 = principal(fact2,nfactors = 4,rotate = "none")

PC1 PC2 PC3

pcamodel2 = principal(fact2,nfactors = 4,rotate = "varimax")

> print(pcamodel2, digits = 2)

RC1 RC2 RC3 RC4

##PCA Factor Graph

pcareduced = print(pcamodel2$loadings, cutoff = 0.3)

##Using Factor analysis

factana = fa(r=fact2,nfactors = 4,rotate = "varimax",fm = "pa")

PA1 PA2 PA3 PA4

The root mean square of the residuals (RMSR) is 0.02

##Plotting FA Factor Graph

fa.diagram(factanared,cex = 0.7,e.size = 0.1,rsize = 0.15)

##Obtaining PCA and FA scores as dataframes

##Regression modelling without splitting the data

###PCA Regression (without splitting)

> pcaregmodel = lm(Satisfaction~Purchase+Marketing+Afterservice+Product

Residual standard error: 0.7087 on 95 degrees of freedom

##Test validity of PCA by Backtracking model(without data splitting)

##PCA Backtracking graph

###FA Regression without data splitting

faregmodel = lm(CustSat~Sales+Brandim+Aftersaleexp+Prodplace, data=faregdata1)

> faregmodel = lm(CustSat~Sales+Brandim+Aftersaleexp+Prodplace, data=fa

##Backtracking on FA model (without data splitting)

##FA Backtrack plot

##Regression modelling using train and test data

###Splitting PCA Regression data

###PCA regression on test and train data

pcatrainreg = lm(Satisfaction~.,data= pcatrain)

pcatestreg = lm(Satisfaction~., data = pcatest)

Residual standard error: 0.4295 on 25 degrees of freedom

##Backtracking for test and train data (PCA)

##PCA train backtrack

##PCA test backtrack

###Splitting FA regression data for FA analysis

fasplit= sample (1:nrow(faregdata1), 0.7*nrow(faregdata1))

##FA regression on test and train data