You are on page 1of 20

1 Appendix A – Source Code

#=======================================================================
#
# Data Analysis – Factor-Hair-Revised
#
#=======================================================================
##Load readr and dplyr package

library ("psych")
library("readr")
library("dplyr")
library ("corrplot")
library("ggplot2")
library("nFactors")
library("lattice")
library("tidyr")
library("tidyverse")
library("car")

##Set Working Directory

setwd("F:/project")

getwd()

##Import File
fact = read.csv("Factor-Hair-Revised-1.csv",header = TRUE)

##EXPLORATORY ANALYSIS

##Check dimensionality

dim(fact)

##Class and names of data

class(fact)
names(fact)

##Check for missing values

any(is.na(fact))

[1] FALSE

##Structure of data

1
str(fact)
data.frame': 100 obs. of 13 variables:
$ ID : int 1 2 3 4 5 6 7 8 9 10 ...
$ ProdQual : num 8.5 8.2 9.2 6.4 9 6.5 6.9 6.2 5.8 6.4 ...
$ Ecom : num 3.9 2.7 3.4 3.3 3.4 2.8 3.7 3.3 3.6 4.5 ...
$ TechSup : num 2.5 5.1 5.6 7 5.2 3.1 5 3.9 5.1 5.1 ...
$ CompRes : num 5.9 7.2 5.6 3.7 4.6 4.1 2.6 4.8 6.7 6.1 ...
$ Advertising : num 4.8 3.4 5.4 4.7 2.2 4 2.1 4.6 3.7 4.7 ...
$ ProdLine : num 4.9 7.9 7.4 4.7 6 4.3 2.3 3.6 5.9 5.7 ...
$ SalesFImage : num 6 3.1 5.8 4.5 4.5 3.7 5.4 5.1 5.8 5.7 ...
$ ComPricing : num 6.8 5.3 4.5 8.8 6.8 8.5 8.9 6.9 9.3 8.4 ...
$ WartyClaim : num 4.7 5.5 6.2 7 6.1 5.1 4.8 5.4 5.9 5.4 ...
$ OrdBilling : num 5 3.9 5.4 4.3 4.5 3.6 2.1 4.3 4.4 4.1 ...
$ DelSpeed : num 3.7 4.9 4.5 3 3.5 3.3 2 3.7 4.6 4.4 ...
$ Satisfaction: num 8.2 5.7 8.9 4.8 7.1 4.7 5.7 6.3 7 5.5 ...

##Summary of the dataset

summary(fact)
ID ProdQual Ecom
Min. : 1.00 Min. : 5.000 Min. :2.200
1st Qu.: 25.75 1st Qu.: 6.575 1st Qu.:3.275
Median : 50.50 Median : 8.000 Median :3.600
Mean : 50.50 Mean : 7.810 Mean :3.672
3rd Qu.: 75.25 3rd Qu.: 9.100 3rd Qu.:3.925
Max. :100.00 Max. :10.000 Max. :5.700
TechSup CompRes Advertising
Min. :1.300 Min. :2.600 Min. :1.900
1st Qu.:4.250 1st Qu.:4.600 1st Qu.:3.175
Median :5.400 Median :5.450 Median :4.000
Mean :5.365 Mean :5.442 Mean :4.010
3rd Qu.:6.625 3rd Qu.:6.325 3rd Qu.:4.800
Max. :8.500 Max. :7.800 Max. :6.500
ProdLine SalesFImage ComPricing
Min. :2.300 Min. :2.900 Min. :3.700
1st Qu.:4.700 1st Qu.:4.500 1st Qu.:5.875
Median :5.750 Median :4.900 Median :7.100
Mean :5.805 Mean :5.123 Mean :6.974
3rd Qu.:6.800 3rd Qu.:5.800 3rd Qu.:8.400
Max. :8.400 Max. :8.200 Max. :9.900
WartyClaim OrdBilling DelSpeed
Min. :4.100 Min. :2.000 Min. :1.600
1st Qu.:5.400 1st Qu.:3.700 1st Qu.:3.400
Median :6.100 Median :4.400 Median :3.900
Mean :6.043 Mean :4.278 Mean :3.886
3rd Qu.:6.600 3rd Qu.:4.800 3rd Qu.:4.425
Max. :8.100 Max. :6.700 Max. :5.500
Satisfaction
Min. :4.700
1st Qu.:6.000
Median :7.050
Mean :6.918
3rd Qu.:7.625
Max. :9.900

2
describe(fact)
vars n mean sd median trimmed mad
ID 1 100 50.50 29.01 50.50 50.50 37.06
ProdQual 2 100 7.81 1.40 8.00 7.85 1.78
Ecom 3 100 3.67 0.70 3.60 3.63 0.52
TechSup 4 100 5.37 1.53 5.40 5.40 1.85
CompRes 5 100 5.44 1.21 5.45 5.46 1.26
Advertising 6 100 4.01 1.13 4.00 4.00 1.19
ProdLine 7 100 5.80 1.32 5.75 5.81 1.56
SalesFImage 8 100 5.12 1.07 4.90 5.09 0.89
ComPricing 9 100 6.97 1.55 7.10 7.01 1.93
WartyClaim 10 100 6.04 0.82 6.10 6.04 0.89
OrdBilling 11 100 4.28 0.93 4.40 4.31 0.74
DelSpeed 12 100 3.89 0.73 3.90 3.92 0.74
Satisfaction 13 100 6.92 1.19 7.05 6.90 1.33
min max range skew kurtosis se
ID 1.0 100.0 99.0 0.00 -1.24 2.90
ProdQual 5.0 10.0 5.0 -0.24 -1.17 0.14
Ecom 2.2 5.7 3.5 0.64 0.57 0.07
TechSup 1.3 8.5 7.2 -0.20 -0.63 0.15
CompRes 2.6 7.8 5.2 -0.13 -0.66 0.12
Advertising 1.9 6.5 4.6 0.04 -0.94 0.11
ProdLine 2.3 8.4 6.1 -0.09 -0.60 0.13
SalesFImage 2.9 8.2 5.3 0.37 0.26 0.11
ComPricing 3.7 9.9 6.2 -0.23 -0.96 0.15
WartyClaim 4.1 8.1 4.0 0.01 -0.53 0.08
OrdBilling 2.0 6.7 4.7 -0.32 0.11 0.09
DelSpeed 1.6 5.5 3.9 -0.45 0.09 0.07
Satisfaction 4.7 9.9 5.2 0.08 -0.86 0.12

attach(fact)
fact1=fact[-c(1)]

##Univariate graphs

histogram(Satisfaction,xlab = "Satisfaction",ylab="Count", xlim=c(0,11), ylim = c(0,35))


boxplot(Satisfaction,xlab= colnames(fact1), ylim=c(0,10))
densityplot(Satisfaction, xlab = "Satisfaction", ylab = "count", xlim=c(0,11), ylim =c(0,0.3))

fact2=fact1[-c(12)]
multi.hist(fact2,bcol = "grey",xlim = c(0,11))
par(mfrow = c(2,1))
boxplot(fact2, las=2, cex.axis = 0.7)
par(mfrow = c(1,1))

##Bivariate graphs

par(mar = c(1.9,1.9,1.9,1.9))
par(mfrow = c(4,3))
for (i in c(1:11)) {
plot(fact1[,i],Satisfaction,xlab = colnames(fact1[,c(1:11)]),ylab = "Satisfaction", col= "red", xlim= c(0,9),
ylim=c(0,9),cex.axis = 0.75, cex.lab = 1.7,col.lab = "blue" )
abline(lm(formula = Satisfaction~fact1[,i]),col = "blue")
}

3
##Outliers in boxplot

out1=boxplot(fact2$ProdQual, plot = FALSE)$out


out2 = boxplot(fact2$Ecom, plot = FALSE)$out
out3= boxplot(fact2$TechSup, plot = FALSE)$out
out4 = boxplot(fact2$CompRes, plot = FALSE)$out
out5 = boxplot(fact2$Advertising, plot = FALSE)$out
out6 = boxplot(fact2$ProdLine, plot = FALSE)$out
out7 =boxplot(fact2$SalesFImage, plot = FALSE)$out
out8 = boxplot(fact2$ComPricing, plot = FALSE)$out
out9=boxplot(fact2$WartyClaim, plot = FALSE)$out
out10=boxplot(fact2$OrdBilling, plot = FALSE)$out
out11=boxplot(fact2$DelSpeed, plot = FALSE)$out
outliers = cbind(out1,out2,out3, out4,out5,out6,out7,out8,out9,out10,out11)
view(outliers)
outliers = as.data.frame(outliers)
colnames(outliers)
colnames(outliers)= c("ecom","sales","ordbill","delsp")
outliers
write.csv(outliers,"outlier1.csv") ##outliers of the data

##.MULTICOLLINEARITY

correlation = cor(fact1)
write.csv(correlation, "cormat.csv")
corrplot(correlation, method = "number",number.digits = 2,number.cex = 0.8, tl.offset = 2,tl.cex = 1,
win.graph(4500,4500))

###Multicollinearity using VIF matrix

model1 = lm(Satisfaction~.,data = fact1)


vif(model1)
ProdQual Ecom TechSup CompRes
1.635797 2.756694 2.976796 4.730448
Advertising ProdLine SalesFImage ComPricing
1.508933 3.488185 3.439420 1.635000
WartyClaim OrdBilling DelSpeed
3.198337 2.902999 6.516014

4
##LINEAR REGRESSION

lm.prodqual = lm(Satisfaction~ProdQual, fact)


lm.prodqual
summary(lm.prodqual)
plot(ProdQual,Satisfaction)
lines(loess.smooth(ProdQual, Satisfaction), col = "Red")
Call:
lm(formula = Satisfaction ~ ProdQual, data = fact)
Coefficients:
(Intercept) ProdQual
3.6759 0.4151
> summary(lm.prodqual)

Call:
lm(formula = Satisfaction ~ ProdQual, data = fact)
Residuals:
Min 1Q Median 3Q Max
-1.88746 -0.72711 -0.01577 0.85641 2.25220
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.67593 0.59765 6.151 1.68e-08 ***
ProdQual 0.41512 0.07534 5.510 2.90e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.047 on 98 degrees of freedom
Multiple R-squared: 0.2365, Adjusted R-squared: 0.2287
F-statistic: 30.36 on 1 and 98 DF, p-value: 2.901e-07

lm.ecom = lm(Satisfaction~Ecom, fact)


lm.ecom
summary(lm.ecom)
plot(Ecom,Satisfaction)
lines(loess.smooth(Ecom, Satisfaction), col = "Red")
> lm.ecom

Call:
lm(formula = Satisfaction ~ Ecom, data = fact)
Coefficients:
(Intercept) Ecom
5.1516 0.4811
> summary(lm.ecom)

Call:
lm(formula = Satisfaction ~ Ecom, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.37200 -0.78971 0.04959 0.68085 2.34580

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.1516 0.6161 8.361 4.28e-13 ***

5
Ecom 0.4811 0.1649 2.918 0.00437 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.149 on 98 degrees of freedom
Multiple R-squared: 0.07994, Adjusted R-squared: 0.07056
F-statistic: 8.515 on 1 and 98 DF, p-value: 0.004368

lm.techsup = lm(Satisfaction~TechSup, fact)


lm.techsup
summary(lm.techsup)
plot(TechSup,Satisfaction)
lines(loess.smooth(TechSup, Satisfaction), col = "Red")

> lm.techsup

Call:
lm(formula = Satisfaction ~ TechSup, data = fact)

Coefficients:
(Intercept) TechSup
6.44757 0.08768
> summary(lm.techsup)

Call:
lm(formula = Satisfaction ~ TechSup, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.26136 -0.93297 0.04302 0.82501 2.85617
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.44757 0.43592 14.791 <2e-16 ***
TechSup 0.08768 0.07817 1.122 0.265
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.19 on 98 degrees of freedom
Multiple R-squared: 0.01268, Adjusted R-squared: 0.002603
F-statistic: 1.258 on 1 and 98 DF, p-value: 0.2647
TechSup 0.08768 0.07817 1.122 0.265
lm.compres = lm(Satisfaction~CompRes, fact)
lm.compres
summary(lm.compres)
plot(CompRes,Satisfaction)
lines(loess.smooth(CompRes, Satisfaction), col = "Red")
> lm.compres

Call:
lm(formula = Satisfaction ~ CompRes, data = fact)
Coefficients:
(Intercept) CompRes
3.680 0.595

> summary(lm.compres)

6
Call:
lm(formula = Satisfaction ~ CompRes, data = fact)

Residuals:
Min 1Q Median 3Q Max
-2.40450 -0.66164 0.04499 0.63037 2.70949
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.68005 0.44285 8.310 5.51e-13 ***
CompRes 0.59499 0.07946 7.488 3.09e-11 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9554 on 98 degrees of freedom


Multiple R-squared: 0.3639, Adjusted R-squared: 0.3574
F-statistic: 56.07 on 1 and 98 DF, p-value: 3.085e-11

lm.Advertising = lm(Satisfaction~Advertising, fact)


lm.Advertising
summary(lm.Advertising)
plot(Advertising,Satisfaction)
lines(loess.smooth(Advertising, Satisfaction), col = "Red")
> lm.Advertising

Call:
lm(formula = Satisfaction ~ Advertising, data = fact)

Coefficients:
(Intercept) Advertising
5.6259 0.3222
> summary(lm.Advertising)

Call:
lm(formula = Satisfaction ~ Advertising, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.34033 -0.92755 0.05577 0.79773 2.53412
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.6259 0.4237 13.279 < 2e-16 ***
Advertising 0.3222 0.1018 3.167 0.00206 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.141 on 98 degrees of freedom
Multiple R-squared: 0.09282, Adjusted R-squared: 0.08357
F-statistic: 10.03 on 1 and 98 DF, p-value: 0.002056

lm.ProdLine = lm(Satisfaction~ProdLine, fact)


lm.ProdLine
summary(lm.ProdLine)
plot(ProdLine,Satisfaction)
lines(loess.smooth(ProdLine, Satisfaction), col = "Red")

7
> lm.ProdLine

Call:
lm(formula = Satisfaction ~ ProdLine, data = fact)
Coefficients:
(Intercept) ProdLine
4.0220 0.4989
> summary(lm.ProdLine)

Call:
lm(formula = Satisfaction ~ ProdLine, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.3634 -0.7795 0.1097 0.7604 1.7373

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.02203 0.45471 8.845 3.87e-14 ***
ProdLine 0.49887 0.07641 6.529 2.95e-09 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1 on 98 degrees of freedom
Multiple R-squared: 0.3031, Adjusted R-squared: 0.296
F-statistic: 42.62 on 1 and 98 DF, p-value: 2.953e-09

lm.Salesimage = lm(Satisfaction~SalesFImage, fact)


lm.Salesimage
summary(lm.Salesimage)
plot(SalesFImage,Satisfaction)
lines(loess.smooth(SalesFImage, Satisfaction), col = "Red")

> lm.Salesimage

Call:
lm(formula = Satisfaction ~ SalesFImage, data = fact)
Coefficients:
(Intercept) SalesFImage
4.070 0.556

> summary(lm.Salesimage)

Call:
lm(formula = Satisfaction ~ SalesFImage, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.2164 -0.5884 0.1838 0.6922 2.0728
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.06983 0.50874 8.000 2.54e-12 ***
SalesFImage 0.55596 0.09722 5.719 1.16e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

8
Residual standard error: 1.037 on 98 degrees of freedom
Multiple R-squared: 0.2502, Adjusted R-squared: 0.2426
F-statistic: 32.7 on 1 and 98 DF, p-value: 1.164e-07

lm.compricing = lm(Satisfaction~ComPricing, fact)


lm.compricing
summary(lm.compricing)
plot(ComPricing,Satisfaction)
lines(loess.smooth(ComPricing, Satisfaction), col = "Red")
> lm.compricing

Call:
lm(formula = Satisfaction ~ ComPricing, data = fact)

Coefficients:
(Intercept) ComPricing
8.0386 -0.1607
> summary(lm.compricing)

Call:
lm(formula = Satisfaction ~ ComPricing, data = fact)
Residuals:
Min 1Q Median 3Q Max
-1.9728 -0.9915 -0.1156 0.9111 2.5845

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.03856 0.54427 14.769 <2e-16 ***
ComPricing -0.16068 0.07621 -2.108 0.0376 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.172 on 98 degrees of freedom
Multiple R-squared: 0.04339, Adjusted R-squared: 0.03363
F-statistic: 4.445 on 1 and 98 DF, p-value: 0.03756

lm.warty = lm(Satisfaction~WartyClaim, fact)


lm.warty
summary(lm.warty)
plot(WartyClaim,Satisfaction)
lines(loess.smooth(WartyClaim, Satisfaction), col = "Red")

> lm.warty

Call:
lm(formula = Satisfaction ~ WartyClaim, data = fact)
Coefficients:
(Intercept) WartyClaim
5.3581 0.2581
> summary(lm.warty)

Call:
lm(formula = Satisfaction ~ WartyClaim, data = fact)

Residuals:
Min 1Q Median 3Q Max

9
-2.36504 -0.90202 0.03019 0.90763 2.88985
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.3581 0.8813 6.079 2.32e-08 ***
WartyClaim 0.2581 0.1445 1.786 0.0772 .
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.179 on 98 degrees of freedom
Multiple R-squared: 0.03152, Adjusted R-squared: 0.02164
F-statistic: 3.19 on 1 and 98 DF, p-value: 0.0772

lm.ordbill = lm(Satisfaction~OrdBilling, fact)


lm.ordbill
summary(lm.ordbill)
plot(OrdBilling,Satisfaction)
lines(loess.smooth(OrdBilling, Satisfaction), col = "Red")

> lm.ordbill

Call:
lm(formula = Satisfaction ~ OrdBilling, data = fact)
Coefficients:
(Intercept) OrdBilling
4.0541 0.6695

> summary(lm.ordbill)

Call:
lm(formula = Satisfaction ~ OrdBilling, data = fact)

Residuals:
Min 1Q Median 3Q Max
-2.4005 -0.7071 -0.0344 0.7340 2.9673
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.0541 0.4840 8.377 3.96e-13 ***
OrdBilling 0.6695 0.1106 6.054 2.60e-08 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.022 on 98 degrees of freedom


Multiple R-squared: 0.2722, Adjusted R-squared: 0.2648
F-statistic: 36.65 on 1 and 98 DF, p-value: 2.602e-08

lm.delsp = lm(Satisfaction~DelSpeed, fact)


lm.delsp
summary(lm.delsp)
plot(DelSpeed,Satisfaction)
lines(loess.smooth(DelSpeed, Satisfaction), col = "Red")

> lm.delsp

Call:
lm(formula = Satisfaction ~ DelSpeed, data = fact)

10
Coefficients:
(Intercept) DelSpeed
3.2791 0.9364
> summary(lm.delsp)

Call:
lm(formula = Satisfaction ~ DelSpeed, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.22475 -0.54846 0.08796 0.54462 2.59432

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.2791 0.5294 6.194 1.38e-08 ***
DelSpeed 0.9364 0.1339 6.994 3.30e-10 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9783 on 98 degrees of freedom
Multiple R-squared: 0.333, Adjusted R-squared: 0.3262
F-statistic: 48.92 on 1 and 98 DF, p-value: 3.3e-10

PRINCIPAL COMPONENT AND FACTOR ANALYSIS

##Bartlett sphericity test for checking PCA/FA

print(cortest.bartlett(fact2,nrow(fact1)))

> print(cortest.bartlett(fact2,nrow(fact1)))
R was not square, finding R from data
$chisq
[1] 619.2726

$p.value
[1] 1.79337e-96

$df
[1] 55

##KMO test for checking FA

kmo1 = cor(fact2)
KMO(r = kmo1)

> kmo1 = cor(fact2)


> KMO(r = kmo1)
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = kmo1)
Overall MSA = 0.65
MSA for each item =
ProdQual Ecom TechSup CompRes
0.51 0.63 0.52 0.79
Advertising ProdLine SalesFImage ComPricing
0.78 0.62 0.62 0.75
WartyClaim OrdBilling DelSpeed
0.51 0.76 0.67

11
##PCA

##Eigen values and scree plot

ev = eigen(cor(fact2))
eigenval = ev$values
eigenval

> eigenval
[1] 3.42697133 2.55089671 1.69097648 1.08655606
[5] 0.60942409 0.55188378 0.40151815 0.24695154
[9] 0.20355327 0.13284158 0.09842702

factor = c(1,2,3,4,5,6,7,8,9,10,11)
scree = data.frame(factor,eigenval)
plot(scree, col = "blue",xlab ="Factors",ylab="Eigen values")
lines(scree, col = "red")
abline(h=1,col = "green")

pcamodel1 = principal(fact2,nfactors = 4,rotate = "none")


pcamodel1
print(pcamodel1, digits = 6) > print(pcamodel1, digits = 6)
Principal Components Analysis
Call: principal(r = fact2, nfactors = 4, rotate = "none")
Standardized loadings (pattern matrix) based upon correlation matrix
PC1 PC2 PC3 PC4
ProdQual 0.247672 -0.500704 -0.080977 0.670392
Ecom 0.307213 0.713143 0.305911 0.283924
TechSup 0.291922 -0.368890 0.794466 -0.201589
CompRes 0.871333 0.031055 -0.273535 -0.215064
Advertising 0.340132 0.580828 0.114559 0.331367
ProdLine 0.715983 -0.454838 -0.151206 0.211501
SalesFImage 0.377035 0.751773 0.313835 0.231593
ComPricing -0.280807 0.660353 -0.068979 -0.347679
WartyClaim 0.394184 -0.306129 0.778358 -0.193155
OrdBilling 0.809381 0.042163 -0.219672 -0.246892
DelSpeed 0.875786 0.116675 -0.302500 -0.205693
h2 u2 com
ProdQual 0.768029 0.2319706 2.19574
Ecom 0.777147 0.2228526 2.13556
TechSup 0.893112 0.1068876 1.87323
CompRes 0.881260 0.1187399 1.32947
Advertising 0.575979 0.4240214 2.37941
ProdLine 0.787105 0.2128950 2.01074
SalesFImage 0.859446 0.1405536 2.09727
ComPricing 0.640558 0.3594422 1.94456
WartyClaim 0.892247 0.1077535 1.98351
OrdBilling 0.766087 0.2339133 1.34855
DelSpeed 0.914430 0.0855696 1.39680

PC1 PC2 PC3


SS loadings 3.426971 2.550897 1.690976
Proportion Var 0.311543 0.231900 0.153725
Cumulative Var 0.311543 0.543443 0.697168
Proportion Explained 0.391412 0.291351 0.193135
Cumulative Proportion 0.391412 0.682764 0.875899
PC4
SS loadings 1.086556
Proportion Var 0.098778

12
Cumulative Var 0.795946
Proportion Explained 0.124101
Cumulative Proportion 1.000000
Mean item complexity = 1.9
Test of the hypothesis that 4 components are sufficient.
The root mean square of the residuals (RMSR) is 0.059561
with the empirical chi square 39.02253 with prob < 0.00177435
Fit based upon off diagonal values = 0.968166

pcamodel2 = principal(fact2,nfactors = 4,rotate = "varimax")


print(pcamodel2, digits = 2)

> print(pcamodel2, digits = 2)


Principal Components Analysis
Call: principal(r = fact2, nfactors = 4, rotate = "varimax")
Standardized loadings (pattern matrix) based upon correlation matrix
RC1 RC2 RC3 RC4 h2 u2 com
ProdQual 0.00 -0.01 -0.03 0.88 0.77 0.232 1.0
Ecom 0.06 0.87 0.05 -0.12 0.78 0.223 1.1
TechSup 0.02 -0.02 0.94 0.10 0.89 0.107 1.0
CompRes 0.93 0.12 0.05 0.09 0.88 0.119 1.1
Advertising 0.14 0.74 -0.08 0.01 0.58 0.424 1.1
ProdLine 0.59 -0.06 0.15 0.64 0.79 0.213 2.1
SalesFImage 0.13 0.90 0.08 -0.16 0.86 0.141 1.1
ComPricing -0.09 0.23 -0.25 -0.72 0.64 0.359 1.5
WartyClaim 0.11 0.05 0.93 0.10 0.89 0.108 1.1
OrdBilling 0.86 0.11 0.08 0.04 0.77 0.234 1.1
DelSpeed 0.94 0.18 0.00 0.05 0.91 0.086 1.1

RC1 RC2 RC3 RC4


SS loadings 2.89 2.23 1.86 1.77
Proportion Var 0.26 0.20 0.17 0.16
Cumulative Var 0.26 0.47 0.63 0.80
Proportion Explained 0.33 0.26 0.21 0.20
Cumulative Proportion 0.33 0.59 0.80 1.00
Mean item complexity = 1.2
Test of the hypothesis that 4 components are sufficient.
The root mean square of the residuals (RMSR) is 0.06
with the empirical chi square 39.02 with prob < 0.0018
Fit based upon off diagonal values = 0.97

##PCA Factor Graph

pcareduced = print(pcamodel2$loadings, cutoff = 0.3)


fa.diagram(pcareduced,cex = 0.7,e.size = 0.1, rsize = 0.15)

##Using Factor analysis

factana = fa(r=fact2,nfactors = 4,rotate = "varimax",fm = "pa")


print(factana, digits = 2)

13
> print(factana, digits = 2)
Factor Analysis using method = pa
Call: fa(r = fact2, nfactors = 4, rotate = "varimax", fm = "pa")
Standardized loadings (pattern matrix) based upon correlation matrix
PA1 PA2 PA3 PA4 h2 u2 com
ProdQual 0.02 -0.07 0.02 0.65 0.42 0.576 1.0
Ecom 0.07 0.79 0.03 -0.11 0.64 0.362 1.1
TechSup 0.02 -0.03 0.88 0.12 0.79 0.205 1.0
CompRes 0.90 0.13 0.05 0.13 0.84 0.157 1.1
Advertising 0.17 0.53 -0.04 -0.06 0.31 0.686 1.2
ProdLine 0.53 -0.04 0.13 0.71 0.80 0.200 1.9
SalesFImage 0.12 0.97 0.06 -0.13 0.98 0.021 1.1
ComPricing -0.08 0.21 -0.21 -0.59 0.44 0.557 1.6
WartyClaim 0.10 0.06 0.89 0.13 0.81 0.186 1.1
OrdBilling 0.77 0.13 0.09 0.09 0.62 0.378 1.1
DelSpeed 0.95 0.19 0.00 0.09 0.94 0.058 1.1

PA1 PA2 PA3 PA4


SS loadings 2.63 1.97 1.64 1.37
Proportion Var 0.24 0.18 0.15 0.12
Cumulative Var 0.24 0.42 0.57 0.69
Proportion Explained 0.35 0.26 0.22 0.18
Cumulative Proportion 0.35 0.60 0.82 1.00
Mean item complexity = 1.2
Test of the hypothesis that 4 factors are sufficient.
The degrees of freedom for the null model are 55 and the objective fu
nction was 6.55 with Chi Square of 619.27
The degrees of freedom for the model are 17 and the objective function
was 0.33

The root mean square of the residuals (RMSR) is 0.02


The df corrected root mean square of the residuals is 0.03

The harmonic number of observations is 100 with the empirical chi squa
re 3.19 with prob < 1
The total number of observations was 100 with Likelihood Chi Square =
30.27 with prob < 0.024
Tucker Lewis Index of factoring reliability = 0.921
RMSEA index = 0.096 and the 90 % confidence intervals are 0.032 0.13
9
BIC = -48.01
Fit based upon off diagonal values = 1
Measures of factor score adequacy
PA1
Correlation of (regression) scores with factors 0.98
Multiple R square of scores with factors 0.96
Minimum correlation of possible factor scores 0.93
PA2
Correlation of (regression) scores with factors 0.99
Multiple R square of scores with factors 0.97
Minimum correlation of possible factor scores 0.94
PA3
Correlation of (regression) scores with factors 0.94
Multiple R square of scores with factors 0.88
Minimum correlation of possible factor scores 0.77
PA4
Correlation of (regression) scores with factors 0.88
Multiple R square of scores with factors 0.78
Minimum correlation of possible factor scores 0.55

14
factanared = print(factana$loadings, cutoff = 0.3)

##Plotting FA Factor Graph

fa.diagram(factanared,cex = 0.7,e.size = 0.1,rsize = 0.15)

##Obtaining PCA and FA scores as dataframes

pcascores = as.data.frame(pcamodel2$scores)
pcascores
write.csv(pcascores,"pcascores.csv")
fascores = as.data.frame(factana$scores)
fascores
write.csv(fascores, "fascores.csv")

##REGRESSION MODEL

##Regression modelling without splitting the data

##PCA

pcaregdata = cbind(fact[,13],pcascores)
class(pcaregdata)
colnames(pcaregdata)=c("Satisfaction","Purchase","Marketing","Afterservice","Productqual")
pcaregdata
head(pcaregdata)
pcaregdata1 = round(pcaregdata,2)
write.csv(pcaregdata1, "PCAregscores.csv")

##FA
faregdata = cbind(fact[,13],fascores)
class(faregdata)
faregdata1 = round(faregdata,2)##rounded fa scores
colnames(faregdata1) = c("CustSat", "Sales", "Brandim","Aftersaleexp","Prodplace")
write.csv(faregdata1, "FAregscores.csv")

###PCA Regression (without splitting)

pcaregmodel=lm(Satisfaction~Purchase+Marketing+Afterservice+Productqual,data=pcaregdata1)
summary(pcaregmodel)

> pcaregmodel = lm(Satisfaction~Purchase+Marketing+Afterservice+Product


qual,data=pcaregdata1)
> summary(pcaregmodel)

Call:
lm(formula = Satisfaction ~ Purchase + Marketing + Afterservice +
Productqual, data = pcaregdata1)

Residuals:
Min 1Q Median 3Q Max
-1.6346 -0.5021 0.1368 0.4617 1.5235
Coefficients:
Estimate Std. Error t value Pr(>|t|)

15
(Intercept) 6.91813 0.07087 97.617 < 2e-16 ***
Purchase 0.61799 0.07122 8.677 1.11e-13 ***
Marketing 0.50994 0.07123 7.159 1.71e-10 ***
Afterservice 0.06686 0.07120 0.939 0.35
Productqual 0.54014 0.07124 7.582 2.27e-11 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7087 on 95 degrees of freedom


Multiple R-squared: 0.6607, Adjusted R-squared: 0.6464
F-statistic: 46.25 on 4 and 95 DF, p-value: < 2.2e-16

##Test validity of PCA by Backtracking model(without data splitting)

pred.sat = predict(pcaregmodel)
as.data.frame(pred.sat)
write.csv(pred.sat,"PCAPrediction.csv")
head(pred.sat)
backtrack = cbind(pcaregdata1,pred.sat)
write.csv(backtrack, "PCAbacktrack.csv")

##PCA Backtracking graph

plot(backtrack$Satisfaction, col="red")
lines(backtrack$Satisfaction, col="red")
plot(backtrack$pred.sat,col="blue", xlab=Satisfaction)
lines(backtrack$pred.sat,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack$Satisfaction, col="red")
text(60,9,"Actual Value",col = "red")
text(15,9,"Predicted Value", col = "blue")

###FA Regression without data splitting

faregmodel = lm(CustSat~Sales+Brandim+Aftersaleexp+Prodplace, data=faregdata1)


summary(faregmodel)

> faregmodel = lm(CustSat~Sales+Brandim+Aftersaleexp+Prodplace, data=fa


regdata1)
> summary(faregmodel)

Call:
lm(formula = CustSat ~ Sales + Brandim + Aftersaleexp + Prodplace,
data = faregdata1)
Residuals:
Min 1Q Median 3Q Max
-1.70957 -0.46940 0.09918 0.41318 1.35209
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.91778 0.06693 103.358 < 2e-16 ***
Sales 0.57956 0.06856 8.453 3.32e-13 ***
Brandim 0.61964 0.06831 9.071 1.60e-14 ***
Aftersaleexp 0.05774 0.07171 0.805 0.423
Prodplace 0.61187 0.07650 7.998 3.03e-12 ***
---
Signif. codes:

16
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6693 on 95 degrees of freedom
Multiple R-squared: 0.6974, Adjusted R-squared: 0.6846
F-statistic: 54.73 on 4 and 95 DF, p-value: < 2.2e-16

##Backtracking on FA model (without data splitting)

pred.sat1=predict(faregmodel)
as.data.frame(pred.sat1)
write.csv(pred.sat1,"FA prediction.csv")
head(pred.sat1)
backtrack1 = cbind(faregdata1,pred.sat1)
write.csv(backtrack1,"FABacktrack.csv")

##FA Backtrack plot

plot(backtrack1$CustSat, col="red")
lines(backtrack1$CustSat,col ="red")
plot(backtrack1$pred.sat1,col="blue", xlab=Satisfaction)
lines(backtrack1$pred.sat1,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack1$CustSat, col="red")
text(41,9,"Actual Value",col = "red")
text(62,5.1,"Predicted Value", col = "blue")

##Regression modelling using train and test data

###Splitting PCA Regression data

set.seed(100)
pcasplit = sample(1:nrow(pcaregdata1), 0.7*nrow(pcaregdata1))
pcatrain = pcaregdata1[pcasplit,]
pcatest = pcaregdata1[-pcasplit,]
class(pcatrain)
write.csv(pcatrain, "PCAtrain.csv")
write.csv(pcatest, "PCAtest.csv")

###PCA regression on test and train data

pcatrainreg = lm(Satisfaction~.,data= pcatrain)


summary(pcatrainreg)

> summary(pcatrainreg)

Call:
lm(formula = Satisfaction ~ ., data = pcatrain)
Residuals:
Min 1Q Median 3Q Max
-1.62117 -0.49516 0.03112 0.44424 1.72181
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.89394 0.09064 76.055 < 2e-16 ***
Purchase 0.56130 0.08285 6.775 4.33e-09 ***

17
Marketing 0.59052 0.09065 6.514 1.24e-08 ***
Afterservice 0.09847 0.08683 1.134 0.261
Productqual 0.47035 0.08590 5.475 7.55e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7455 on 65 degrees of freedom
Multiple R-squared: 0.6536, Adjusted R-squared: 0.6323
F-statistic: 30.66 on 4 and 65 DF, p-value: 2.42e-14

pcatestreg = lm(Satisfaction~., data = pcatest)


summary(pcatestreg)

> summary(pcatestreg)

Call:
lm(formula = Satisfaction ~ ., data = pcatest)

Residuals:
Min 1Q Median 3Q Max
-0.7828 -0.3017 0.0270 0.2832 0.8652
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.15569 0.08762 81.669 < 2e-16 ***
Purchase 0.93570 0.10314 9.072 2.20e-09 ***
Marketing 0.23133 0.08354 2.769 0.0104 *
Afterservice -0.01100 0.08849 -0.124 0.9021
Productqual 0.85112 0.09574 8.890 3.26e-09 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4295 on 25 degrees of freedom


Multiple R-squared: 0.8695, Adjusted R-squared: 0.8486
F-statistic: 41.64 on 4 and 25 DF, p-value: 1.047e-10

##Backtracking for test and train data (PCA)

##PCA train backtrack

pred.satttrain = predict(pcatrainreg)
as.data.frame(pred.satttrain)
head(pred.satttrain)
write.csv(pred.satttrain, "PCAtrainpredict.csv")
backtrack2 = cbind(pcatrain,pred.satttrain)
write.csv(backtrack2,"pcasplitbact.csv")
plot(backtrack2$Satisfaction, col="red")
lines(backtrack2$Satisfaction, col="red")
plot(backtrack2$pred.satttrain,col="blue", xlab=Satisfaction)
lines(backtrack2$pred.satttrain,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack2$Satisfaction, col="red")
text(40,8,"Actual Value",col = "red")
text(40.1,5,"Predicted Value", col = "blue")

##PCA test backtrack

18
pred.satttest = predict(pcatestreg)
as.data.frame(pred.satttest)
head(pred.satttest)
write.csv(pred.satttest, "PCAtestpredict.csv")
backtrack3 = cbind(pcatest,pred.satttest)
write.csv(backtrack3,"pcatestsplitbact.csv")
plot(backtrack3$Satisfaction, col="red")
lines(backtrack3$Satisfaction, col="red")
plot(backtrack3$pred.satttest,col="blue", xlab=Satisfaction)
lines(backtrack3$pred.satttest,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack3$Satisfaction, col="red")
text(9,7.45,"Actual Value",col = "red")
text(24.3,8.2,"Predicted Value", col = "blue")

###Splitting FA regression data for FA analysis

fasplit= sample (1:nrow(faregdata1), 0.7*nrow(faregdata1))


fatrain = faregdata1[fasplit,]
fatest = faregdata1[-fasplit,]
write.csv(fatrain, "FAtrain.csv")
write.csv(fatest, "FAtest.csv")

##FA regression on test and train data

fatrainreg = lm(CustSat~., data=fatrain)


summary(fatrainreg)

> summary(fatrainreg)

Call:
lm(formula = CustSat ~ ., data = fatrain)
Residuals:
Min 1Q Median 3Q Max
-1.62091 -0.40715 -0.02284 0.48897 1.37143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.88507 0.07720 89.183 < 2e-16 ***
Sales 0.60031 0.07416 8.095 1.98e-11 ***
Brandim 0.58485 0.08508 6.875 2.89e-09 ***
Aftersaleexp -0.01849 0.07925 -0.233 0.816
Prodplace 0.55627 0.09408 5.913 1.37e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6453 on 65 degrees of freedom
Multiple R-squared: 0.6747, Adjusted R-squared: 0.6547
F-statistic: 33.71 on 4 and 65 DF, p-value: 3.217e-15

fatestreg = lm(CustSat~.,data=fatest)
summary(fatestreg)

19
> summary(fatestreg)

Call:
lm(formula = CustSat ~ ., data = fatest)
Residuals:
Min 1Q Median 3Q Max
-1.88613 -0.35542 -0.07904 0.54605 1.15182
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.0102 0.1343 52.203 < 2e-16 ***
Sales 0.5169 0.1757 2.942 0.006938 **
Brandim 0.6485 0.1434 4.523 0.000128 ***
Aftersaleexp 0.3002 0.1722 1.743 0.093609 .
Prodplace 0.6142 0.1762 3.487 0.001826 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7291 on 25 degrees of freedom
Multiple R-squared: 0.7658, Adjusted R-squared: 0.7283
F-statistic: 20.43 on 4 and 25 DF, p-value: 1.396e-07

##Backtracking for FA Test and train data

### FA train data backtrack

fapred.satttrain = predict(fatrainreg)
as.data.frame(fapred.satttrain)
head(fapred.satttrain)
write.csv(fapred.satttrain, "FAtrainpredict.csv")
fabacktrack = cbind(fatrain,fapred.satttrain)
write.csv(fabacktrack,"fatrainbact.csv")
plot(fabacktrack$CustSat, col="red")
lines(fabacktrack$CustSat, col="red")
plot(fabacktrack$fapred.satttrain,col="blue", xlab=Satisfaction)
lines(fabacktrack$fapred.satttrain,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(fabacktrack$CustSat, col="red")
text(12,8,"Actual Value",col = "red")
text(50,5,"Predicted Value", col = "blue")

##FA test data backtrack


fapred.satttest = predict(fatestreg)
as.data.frame(fapred.satttest)
head(fapred.satttest)
write.csv(fapred.satttest, "FAtestpredict.csv")
fabacktrack2 = cbind(fatest,fapred.satttest)
write.csv(fabacktrack2,"fatestbact.csv")
plot(fabacktrack2$CustSat, col="red")
lines(fabacktrack2$CustSat, col="red")
plot(fabacktrack2$fapred.satttest,col="blue", xlab=Satisfaction)
lines(fabacktrack2$fapred.satttest,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(fabacktrack2$CustSat, col="red")
text(14,8.7,"Actual Value",col = "red")
text(21,9,"Predicted Value", col = "blue")

20

You might also like