Professional Documents
Culture Documents
Pratik Zanke Source Codes PDF
Pratik Zanke Source Codes PDF
#=======================================================================
#
# Data Analysis – Factor-Hair-Revised
#
#=======================================================================
##Load readr and dplyr package
library ("psych")
library("readr")
library("dplyr")
library ("corrplot")
library("ggplot2")
library("nFactors")
library("lattice")
library("tidyr")
library("tidyverse")
library("car")
setwd("F:/project")
getwd()
##Import File
fact = read.csv("Factor-Hair-Revised-1.csv",header = TRUE)
##EXPLORATORY ANALYSIS
##Check dimensionality
dim(fact)
class(fact)
names(fact)
any(is.na(fact))
[1] FALSE
##Structure of data
1
str(fact)
data.frame': 100 obs. of 13 variables:
$ ID : int 1 2 3 4 5 6 7 8 9 10 ...
$ ProdQual : num 8.5 8.2 9.2 6.4 9 6.5 6.9 6.2 5.8 6.4 ...
$ Ecom : num 3.9 2.7 3.4 3.3 3.4 2.8 3.7 3.3 3.6 4.5 ...
$ TechSup : num 2.5 5.1 5.6 7 5.2 3.1 5 3.9 5.1 5.1 ...
$ CompRes : num 5.9 7.2 5.6 3.7 4.6 4.1 2.6 4.8 6.7 6.1 ...
$ Advertising : num 4.8 3.4 5.4 4.7 2.2 4 2.1 4.6 3.7 4.7 ...
$ ProdLine : num 4.9 7.9 7.4 4.7 6 4.3 2.3 3.6 5.9 5.7 ...
$ SalesFImage : num 6 3.1 5.8 4.5 4.5 3.7 5.4 5.1 5.8 5.7 ...
$ ComPricing : num 6.8 5.3 4.5 8.8 6.8 8.5 8.9 6.9 9.3 8.4 ...
$ WartyClaim : num 4.7 5.5 6.2 7 6.1 5.1 4.8 5.4 5.9 5.4 ...
$ OrdBilling : num 5 3.9 5.4 4.3 4.5 3.6 2.1 4.3 4.4 4.1 ...
$ DelSpeed : num 3.7 4.9 4.5 3 3.5 3.3 2 3.7 4.6 4.4 ...
$ Satisfaction: num 8.2 5.7 8.9 4.8 7.1 4.7 5.7 6.3 7 5.5 ...
summary(fact)
ID ProdQual Ecom
Min. : 1.00 Min. : 5.000 Min. :2.200
1st Qu.: 25.75 1st Qu.: 6.575 1st Qu.:3.275
Median : 50.50 Median : 8.000 Median :3.600
Mean : 50.50 Mean : 7.810 Mean :3.672
3rd Qu.: 75.25 3rd Qu.: 9.100 3rd Qu.:3.925
Max. :100.00 Max. :10.000 Max. :5.700
TechSup CompRes Advertising
Min. :1.300 Min. :2.600 Min. :1.900
1st Qu.:4.250 1st Qu.:4.600 1st Qu.:3.175
Median :5.400 Median :5.450 Median :4.000
Mean :5.365 Mean :5.442 Mean :4.010
3rd Qu.:6.625 3rd Qu.:6.325 3rd Qu.:4.800
Max. :8.500 Max. :7.800 Max. :6.500
ProdLine SalesFImage ComPricing
Min. :2.300 Min. :2.900 Min. :3.700
1st Qu.:4.700 1st Qu.:4.500 1st Qu.:5.875
Median :5.750 Median :4.900 Median :7.100
Mean :5.805 Mean :5.123 Mean :6.974
3rd Qu.:6.800 3rd Qu.:5.800 3rd Qu.:8.400
Max. :8.400 Max. :8.200 Max. :9.900
WartyClaim OrdBilling DelSpeed
Min. :4.100 Min. :2.000 Min. :1.600
1st Qu.:5.400 1st Qu.:3.700 1st Qu.:3.400
Median :6.100 Median :4.400 Median :3.900
Mean :6.043 Mean :4.278 Mean :3.886
3rd Qu.:6.600 3rd Qu.:4.800 3rd Qu.:4.425
Max. :8.100 Max. :6.700 Max. :5.500
Satisfaction
Min. :4.700
1st Qu.:6.000
Median :7.050
Mean :6.918
3rd Qu.:7.625
Max. :9.900
2
describe(fact)
vars n mean sd median trimmed mad
ID 1 100 50.50 29.01 50.50 50.50 37.06
ProdQual 2 100 7.81 1.40 8.00 7.85 1.78
Ecom 3 100 3.67 0.70 3.60 3.63 0.52
TechSup 4 100 5.37 1.53 5.40 5.40 1.85
CompRes 5 100 5.44 1.21 5.45 5.46 1.26
Advertising 6 100 4.01 1.13 4.00 4.00 1.19
ProdLine 7 100 5.80 1.32 5.75 5.81 1.56
SalesFImage 8 100 5.12 1.07 4.90 5.09 0.89
ComPricing 9 100 6.97 1.55 7.10 7.01 1.93
WartyClaim 10 100 6.04 0.82 6.10 6.04 0.89
OrdBilling 11 100 4.28 0.93 4.40 4.31 0.74
DelSpeed 12 100 3.89 0.73 3.90 3.92 0.74
Satisfaction 13 100 6.92 1.19 7.05 6.90 1.33
min max range skew kurtosis se
ID 1.0 100.0 99.0 0.00 -1.24 2.90
ProdQual 5.0 10.0 5.0 -0.24 -1.17 0.14
Ecom 2.2 5.7 3.5 0.64 0.57 0.07
TechSup 1.3 8.5 7.2 -0.20 -0.63 0.15
CompRes 2.6 7.8 5.2 -0.13 -0.66 0.12
Advertising 1.9 6.5 4.6 0.04 -0.94 0.11
ProdLine 2.3 8.4 6.1 -0.09 -0.60 0.13
SalesFImage 2.9 8.2 5.3 0.37 0.26 0.11
ComPricing 3.7 9.9 6.2 -0.23 -0.96 0.15
WartyClaim 4.1 8.1 4.0 0.01 -0.53 0.08
OrdBilling 2.0 6.7 4.7 -0.32 0.11 0.09
DelSpeed 1.6 5.5 3.9 -0.45 0.09 0.07
Satisfaction 4.7 9.9 5.2 0.08 -0.86 0.12
attach(fact)
fact1=fact[-c(1)]
##Univariate graphs
fact2=fact1[-c(12)]
multi.hist(fact2,bcol = "grey",xlim = c(0,11))
par(mfrow = c(2,1))
boxplot(fact2, las=2, cex.axis = 0.7)
par(mfrow = c(1,1))
##Bivariate graphs
par(mar = c(1.9,1.9,1.9,1.9))
par(mfrow = c(4,3))
for (i in c(1:11)) {
plot(fact1[,i],Satisfaction,xlab = colnames(fact1[,c(1:11)]),ylab = "Satisfaction", col= "red", xlim= c(0,9),
ylim=c(0,9),cex.axis = 0.75, cex.lab = 1.7,col.lab = "blue" )
abline(lm(formula = Satisfaction~fact1[,i]),col = "blue")
}
3
##Outliers in boxplot
##.MULTICOLLINEARITY
correlation = cor(fact1)
write.csv(correlation, "cormat.csv")
corrplot(correlation, method = "number",number.digits = 2,number.cex = 0.8, tl.offset = 2,tl.cex = 1,
win.graph(4500,4500))
4
##LINEAR REGRESSION
Call:
lm(formula = Satisfaction ~ ProdQual, data = fact)
Residuals:
Min 1Q Median 3Q Max
-1.88746 -0.72711 -0.01577 0.85641 2.25220
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.67593 0.59765 6.151 1.68e-08 ***
ProdQual 0.41512 0.07534 5.510 2.90e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.047 on 98 degrees of freedom
Multiple R-squared: 0.2365, Adjusted R-squared: 0.2287
F-statistic: 30.36 on 1 and 98 DF, p-value: 2.901e-07
Call:
lm(formula = Satisfaction ~ Ecom, data = fact)
Coefficients:
(Intercept) Ecom
5.1516 0.4811
> summary(lm.ecom)
Call:
lm(formula = Satisfaction ~ Ecom, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.37200 -0.78971 0.04959 0.68085 2.34580
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.1516 0.6161 8.361 4.28e-13 ***
5
Ecom 0.4811 0.1649 2.918 0.00437 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.149 on 98 degrees of freedom
Multiple R-squared: 0.07994, Adjusted R-squared: 0.07056
F-statistic: 8.515 on 1 and 98 DF, p-value: 0.004368
> lm.techsup
Call:
lm(formula = Satisfaction ~ TechSup, data = fact)
Coefficients:
(Intercept) TechSup
6.44757 0.08768
> summary(lm.techsup)
Call:
lm(formula = Satisfaction ~ TechSup, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.26136 -0.93297 0.04302 0.82501 2.85617
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.44757 0.43592 14.791 <2e-16 ***
TechSup 0.08768 0.07817 1.122 0.265
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.19 on 98 degrees of freedom
Multiple R-squared: 0.01268, Adjusted R-squared: 0.002603
F-statistic: 1.258 on 1 and 98 DF, p-value: 0.2647
TechSup 0.08768 0.07817 1.122 0.265
lm.compres = lm(Satisfaction~CompRes, fact)
lm.compres
summary(lm.compres)
plot(CompRes,Satisfaction)
lines(loess.smooth(CompRes, Satisfaction), col = "Red")
> lm.compres
Call:
lm(formula = Satisfaction ~ CompRes, data = fact)
Coefficients:
(Intercept) CompRes
3.680 0.595
> summary(lm.compres)
6
Call:
lm(formula = Satisfaction ~ CompRes, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.40450 -0.66164 0.04499 0.63037 2.70949
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.68005 0.44285 8.310 5.51e-13 ***
CompRes 0.59499 0.07946 7.488 3.09e-11 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Call:
lm(formula = Satisfaction ~ Advertising, data = fact)
Coefficients:
(Intercept) Advertising
5.6259 0.3222
> summary(lm.Advertising)
Call:
lm(formula = Satisfaction ~ Advertising, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.34033 -0.92755 0.05577 0.79773 2.53412
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.6259 0.4237 13.279 < 2e-16 ***
Advertising 0.3222 0.1018 3.167 0.00206 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.141 on 98 degrees of freedom
Multiple R-squared: 0.09282, Adjusted R-squared: 0.08357
F-statistic: 10.03 on 1 and 98 DF, p-value: 0.002056
7
> lm.ProdLine
Call:
lm(formula = Satisfaction ~ ProdLine, data = fact)
Coefficients:
(Intercept) ProdLine
4.0220 0.4989
> summary(lm.ProdLine)
Call:
lm(formula = Satisfaction ~ ProdLine, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.3634 -0.7795 0.1097 0.7604 1.7373
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.02203 0.45471 8.845 3.87e-14 ***
ProdLine 0.49887 0.07641 6.529 2.95e-09 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1 on 98 degrees of freedom
Multiple R-squared: 0.3031, Adjusted R-squared: 0.296
F-statistic: 42.62 on 1 and 98 DF, p-value: 2.953e-09
> lm.Salesimage
Call:
lm(formula = Satisfaction ~ SalesFImage, data = fact)
Coefficients:
(Intercept) SalesFImage
4.070 0.556
> summary(lm.Salesimage)
Call:
lm(formula = Satisfaction ~ SalesFImage, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.2164 -0.5884 0.1838 0.6922 2.0728
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.06983 0.50874 8.000 2.54e-12 ***
SalesFImage 0.55596 0.09722 5.719 1.16e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
8
Residual standard error: 1.037 on 98 degrees of freedom
Multiple R-squared: 0.2502, Adjusted R-squared: 0.2426
F-statistic: 32.7 on 1 and 98 DF, p-value: 1.164e-07
Call:
lm(formula = Satisfaction ~ ComPricing, data = fact)
Coefficients:
(Intercept) ComPricing
8.0386 -0.1607
> summary(lm.compricing)
Call:
lm(formula = Satisfaction ~ ComPricing, data = fact)
Residuals:
Min 1Q Median 3Q Max
-1.9728 -0.9915 -0.1156 0.9111 2.5845
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.03856 0.54427 14.769 <2e-16 ***
ComPricing -0.16068 0.07621 -2.108 0.0376 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.172 on 98 degrees of freedom
Multiple R-squared: 0.04339, Adjusted R-squared: 0.03363
F-statistic: 4.445 on 1 and 98 DF, p-value: 0.03756
> lm.warty
Call:
lm(formula = Satisfaction ~ WartyClaim, data = fact)
Coefficients:
(Intercept) WartyClaim
5.3581 0.2581
> summary(lm.warty)
Call:
lm(formula = Satisfaction ~ WartyClaim, data = fact)
Residuals:
Min 1Q Median 3Q Max
9
-2.36504 -0.90202 0.03019 0.90763 2.88985
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.3581 0.8813 6.079 2.32e-08 ***
WartyClaim 0.2581 0.1445 1.786 0.0772 .
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.179 on 98 degrees of freedom
Multiple R-squared: 0.03152, Adjusted R-squared: 0.02164
F-statistic: 3.19 on 1 and 98 DF, p-value: 0.0772
> lm.ordbill
Call:
lm(formula = Satisfaction ~ OrdBilling, data = fact)
Coefficients:
(Intercept) OrdBilling
4.0541 0.6695
> summary(lm.ordbill)
Call:
lm(formula = Satisfaction ~ OrdBilling, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.4005 -0.7071 -0.0344 0.7340 2.9673
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.0541 0.4840 8.377 3.96e-13 ***
OrdBilling 0.6695 0.1106 6.054 2.60e-08 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> lm.delsp
Call:
lm(formula = Satisfaction ~ DelSpeed, data = fact)
10
Coefficients:
(Intercept) DelSpeed
3.2791 0.9364
> summary(lm.delsp)
Call:
lm(formula = Satisfaction ~ DelSpeed, data = fact)
Residuals:
Min 1Q Median 3Q Max
-2.22475 -0.54846 0.08796 0.54462 2.59432
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.2791 0.5294 6.194 1.38e-08 ***
DelSpeed 0.9364 0.1339 6.994 3.30e-10 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9783 on 98 degrees of freedom
Multiple R-squared: 0.333, Adjusted R-squared: 0.3262
F-statistic: 48.92 on 1 and 98 DF, p-value: 3.3e-10
print(cortest.bartlett(fact2,nrow(fact1)))
> print(cortest.bartlett(fact2,nrow(fact1)))
R was not square, finding R from data
$chisq
[1] 619.2726
$p.value
[1] 1.79337e-96
$df
[1] 55
kmo1 = cor(fact2)
KMO(r = kmo1)
11
##PCA
ev = eigen(cor(fact2))
eigenval = ev$values
eigenval
> eigenval
[1] 3.42697133 2.55089671 1.69097648 1.08655606
[5] 0.60942409 0.55188378 0.40151815 0.24695154
[9] 0.20355327 0.13284158 0.09842702
factor = c(1,2,3,4,5,6,7,8,9,10,11)
scree = data.frame(factor,eigenval)
plot(scree, col = "blue",xlab ="Factors",ylab="Eigen values")
lines(scree, col = "red")
abline(h=1,col = "green")
12
Cumulative Var 0.795946
Proportion Explained 0.124101
Cumulative Proportion 1.000000
Mean item complexity = 1.9
Test of the hypothesis that 4 components are sufficient.
The root mean square of the residuals (RMSR) is 0.059561
with the empirical chi square 39.02253 with prob < 0.00177435
Fit based upon off diagonal values = 0.968166
13
> print(factana, digits = 2)
Factor Analysis using method = pa
Call: fa(r = fact2, nfactors = 4, rotate = "varimax", fm = "pa")
Standardized loadings (pattern matrix) based upon correlation matrix
PA1 PA2 PA3 PA4 h2 u2 com
ProdQual 0.02 -0.07 0.02 0.65 0.42 0.576 1.0
Ecom 0.07 0.79 0.03 -0.11 0.64 0.362 1.1
TechSup 0.02 -0.03 0.88 0.12 0.79 0.205 1.0
CompRes 0.90 0.13 0.05 0.13 0.84 0.157 1.1
Advertising 0.17 0.53 -0.04 -0.06 0.31 0.686 1.2
ProdLine 0.53 -0.04 0.13 0.71 0.80 0.200 1.9
SalesFImage 0.12 0.97 0.06 -0.13 0.98 0.021 1.1
ComPricing -0.08 0.21 -0.21 -0.59 0.44 0.557 1.6
WartyClaim 0.10 0.06 0.89 0.13 0.81 0.186 1.1
OrdBilling 0.77 0.13 0.09 0.09 0.62 0.378 1.1
DelSpeed 0.95 0.19 0.00 0.09 0.94 0.058 1.1
The harmonic number of observations is 100 with the empirical chi squa
re 3.19 with prob < 1
The total number of observations was 100 with Likelihood Chi Square =
30.27 with prob < 0.024
Tucker Lewis Index of factoring reliability = 0.921
RMSEA index = 0.096 and the 90 % confidence intervals are 0.032 0.13
9
BIC = -48.01
Fit based upon off diagonal values = 1
Measures of factor score adequacy
PA1
Correlation of (regression) scores with factors 0.98
Multiple R square of scores with factors 0.96
Minimum correlation of possible factor scores 0.93
PA2
Correlation of (regression) scores with factors 0.99
Multiple R square of scores with factors 0.97
Minimum correlation of possible factor scores 0.94
PA3
Correlation of (regression) scores with factors 0.94
Multiple R square of scores with factors 0.88
Minimum correlation of possible factor scores 0.77
PA4
Correlation of (regression) scores with factors 0.88
Multiple R square of scores with factors 0.78
Minimum correlation of possible factor scores 0.55
14
factanared = print(factana$loadings, cutoff = 0.3)
pcascores = as.data.frame(pcamodel2$scores)
pcascores
write.csv(pcascores,"pcascores.csv")
fascores = as.data.frame(factana$scores)
fascores
write.csv(fascores, "fascores.csv")
##REGRESSION MODEL
##PCA
pcaregdata = cbind(fact[,13],pcascores)
class(pcaregdata)
colnames(pcaregdata)=c("Satisfaction","Purchase","Marketing","Afterservice","Productqual")
pcaregdata
head(pcaregdata)
pcaregdata1 = round(pcaregdata,2)
write.csv(pcaregdata1, "PCAregscores.csv")
##FA
faregdata = cbind(fact[,13],fascores)
class(faregdata)
faregdata1 = round(faregdata,2)##rounded fa scores
colnames(faregdata1) = c("CustSat", "Sales", "Brandim","Aftersaleexp","Prodplace")
write.csv(faregdata1, "FAregscores.csv")
pcaregmodel=lm(Satisfaction~Purchase+Marketing+Afterservice+Productqual,data=pcaregdata1)
summary(pcaregmodel)
Call:
lm(formula = Satisfaction ~ Purchase + Marketing + Afterservice +
Productqual, data = pcaregdata1)
Residuals:
Min 1Q Median 3Q Max
-1.6346 -0.5021 0.1368 0.4617 1.5235
Coefficients:
Estimate Std. Error t value Pr(>|t|)
15
(Intercept) 6.91813 0.07087 97.617 < 2e-16 ***
Purchase 0.61799 0.07122 8.677 1.11e-13 ***
Marketing 0.50994 0.07123 7.159 1.71e-10 ***
Afterservice 0.06686 0.07120 0.939 0.35
Productqual 0.54014 0.07124 7.582 2.27e-11 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pred.sat = predict(pcaregmodel)
as.data.frame(pred.sat)
write.csv(pred.sat,"PCAPrediction.csv")
head(pred.sat)
backtrack = cbind(pcaregdata1,pred.sat)
write.csv(backtrack, "PCAbacktrack.csv")
plot(backtrack$Satisfaction, col="red")
lines(backtrack$Satisfaction, col="red")
plot(backtrack$pred.sat,col="blue", xlab=Satisfaction)
lines(backtrack$pred.sat,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack$Satisfaction, col="red")
text(60,9,"Actual Value",col = "red")
text(15,9,"Predicted Value", col = "blue")
Call:
lm(formula = CustSat ~ Sales + Brandim + Aftersaleexp + Prodplace,
data = faregdata1)
Residuals:
Min 1Q Median 3Q Max
-1.70957 -0.46940 0.09918 0.41318 1.35209
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.91778 0.06693 103.358 < 2e-16 ***
Sales 0.57956 0.06856 8.453 3.32e-13 ***
Brandim 0.61964 0.06831 9.071 1.60e-14 ***
Aftersaleexp 0.05774 0.07171 0.805 0.423
Prodplace 0.61187 0.07650 7.998 3.03e-12 ***
---
Signif. codes:
16
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6693 on 95 degrees of freedom
Multiple R-squared: 0.6974, Adjusted R-squared: 0.6846
F-statistic: 54.73 on 4 and 95 DF, p-value: < 2.2e-16
pred.sat1=predict(faregmodel)
as.data.frame(pred.sat1)
write.csv(pred.sat1,"FA prediction.csv")
head(pred.sat1)
backtrack1 = cbind(faregdata1,pred.sat1)
write.csv(backtrack1,"FABacktrack.csv")
plot(backtrack1$CustSat, col="red")
lines(backtrack1$CustSat,col ="red")
plot(backtrack1$pred.sat1,col="blue", xlab=Satisfaction)
lines(backtrack1$pred.sat1,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack1$CustSat, col="red")
text(41,9,"Actual Value",col = "red")
text(62,5.1,"Predicted Value", col = "blue")
set.seed(100)
pcasplit = sample(1:nrow(pcaregdata1), 0.7*nrow(pcaregdata1))
pcatrain = pcaregdata1[pcasplit,]
pcatest = pcaregdata1[-pcasplit,]
class(pcatrain)
write.csv(pcatrain, "PCAtrain.csv")
write.csv(pcatest, "PCAtest.csv")
> summary(pcatrainreg)
Call:
lm(formula = Satisfaction ~ ., data = pcatrain)
Residuals:
Min 1Q Median 3Q Max
-1.62117 -0.49516 0.03112 0.44424 1.72181
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.89394 0.09064 76.055 < 2e-16 ***
Purchase 0.56130 0.08285 6.775 4.33e-09 ***
17
Marketing 0.59052 0.09065 6.514 1.24e-08 ***
Afterservice 0.09847 0.08683 1.134 0.261
Productqual 0.47035 0.08590 5.475 7.55e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7455 on 65 degrees of freedom
Multiple R-squared: 0.6536, Adjusted R-squared: 0.6323
F-statistic: 30.66 on 4 and 65 DF, p-value: 2.42e-14
> summary(pcatestreg)
Call:
lm(formula = Satisfaction ~ ., data = pcatest)
Residuals:
Min 1Q Median 3Q Max
-0.7828 -0.3017 0.0270 0.2832 0.8652
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.15569 0.08762 81.669 < 2e-16 ***
Purchase 0.93570 0.10314 9.072 2.20e-09 ***
Marketing 0.23133 0.08354 2.769 0.0104 *
Afterservice -0.01100 0.08849 -0.124 0.9021
Productqual 0.85112 0.09574 8.890 3.26e-09 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
pred.satttrain = predict(pcatrainreg)
as.data.frame(pred.satttrain)
head(pred.satttrain)
write.csv(pred.satttrain, "PCAtrainpredict.csv")
backtrack2 = cbind(pcatrain,pred.satttrain)
write.csv(backtrack2,"pcasplitbact.csv")
plot(backtrack2$Satisfaction, col="red")
lines(backtrack2$Satisfaction, col="red")
plot(backtrack2$pred.satttrain,col="blue", xlab=Satisfaction)
lines(backtrack2$pred.satttrain,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack2$Satisfaction, col="red")
text(40,8,"Actual Value",col = "red")
text(40.1,5,"Predicted Value", col = "blue")
18
pred.satttest = predict(pcatestreg)
as.data.frame(pred.satttest)
head(pred.satttest)
write.csv(pred.satttest, "PCAtestpredict.csv")
backtrack3 = cbind(pcatest,pred.satttest)
write.csv(backtrack3,"pcatestsplitbact.csv")
plot(backtrack3$Satisfaction, col="red")
lines(backtrack3$Satisfaction, col="red")
plot(backtrack3$pred.satttest,col="blue", xlab=Satisfaction)
lines(backtrack3$pred.satttest,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(backtrack3$Satisfaction, col="red")
text(9,7.45,"Actual Value",col = "red")
text(24.3,8.2,"Predicted Value", col = "blue")
> summary(fatrainreg)
Call:
lm(formula = CustSat ~ ., data = fatrain)
Residuals:
Min 1Q Median 3Q Max
-1.62091 -0.40715 -0.02284 0.48897 1.37143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.88507 0.07720 89.183 < 2e-16 ***
Sales 0.60031 0.07416 8.095 1.98e-11 ***
Brandim 0.58485 0.08508 6.875 2.89e-09 ***
Aftersaleexp -0.01849 0.07925 -0.233 0.816
Prodplace 0.55627 0.09408 5.913 1.37e-07 ***
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6453 on 65 degrees of freedom
Multiple R-squared: 0.6747, Adjusted R-squared: 0.6547
F-statistic: 33.71 on 4 and 65 DF, p-value: 3.217e-15
fatestreg = lm(CustSat~.,data=fatest)
summary(fatestreg)
19
> summary(fatestreg)
Call:
lm(formula = CustSat ~ ., data = fatest)
Residuals:
Min 1Q Median 3Q Max
-1.88613 -0.35542 -0.07904 0.54605 1.15182
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.0102 0.1343 52.203 < 2e-16 ***
Sales 0.5169 0.1757 2.942 0.006938 **
Brandim 0.6485 0.1434 4.523 0.000128 ***
Aftersaleexp 0.3002 0.1722 1.743 0.093609 .
Prodplace 0.6142 0.1762 3.487 0.001826 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7291 on 25 degrees of freedom
Multiple R-squared: 0.7658, Adjusted R-squared: 0.7283
F-statistic: 20.43 on 4 and 25 DF, p-value: 1.396e-07
fapred.satttrain = predict(fatrainreg)
as.data.frame(fapred.satttrain)
head(fapred.satttrain)
write.csv(fapred.satttrain, "FAtrainpredict.csv")
fabacktrack = cbind(fatrain,fapred.satttrain)
write.csv(fabacktrack,"fatrainbact.csv")
plot(fabacktrack$CustSat, col="red")
lines(fabacktrack$CustSat, col="red")
plot(fabacktrack$fapred.satttrain,col="blue", xlab=Satisfaction)
lines(fabacktrack$fapred.satttrain,col="blue", xlab = "Count", ylab = "Satisfaction")
lines(fabacktrack$CustSat, col="red")
text(12,8,"Actual Value",col = "red")
text(50,5,"Predicted Value", col = "blue")
20