New Text Document

#---------------------------------------------------------------------------#
Ex1: XBAR CHART

#---------------------------------------------------------------------------#
xbar=read.csv(file.choose(),header=T)
xbar
install.packages("qcc")
library("qcc")
qcc(xbar,type="xbar",nsigma=3)
#---------------------------------------------------------------------------#
Ex 2: R CHART
#---------------------------------------------------------------------------#
rchart=read.csv(file.choose(),header=T,sep=",")
head(rchart)
data=rchart[c(-1)]
data
library(qcc)
qcc(data,type="R",nsigma=3)
#---------------------------------------------------------------------------#
Ex 1: X Bar and R CHART
#---------------------------------------------------------------------------#
data=read.csv(file.choose(),header=T)
head(data)
data1=data[,-1]
head(data1)
qcc(data1,type="xbar",nsigma=3)
qcc(data1,type="R",nsigma=3)
#---------------------------------------------------------------------------#
Ex 2: X Bar and S CHART
#---------------------------------------------------------------------------#
head(data)
data1=data[,-1]
head(data1)
qcc(data1,type="xbar",nsigma=3)
qcc(data1,type="S",nsigma=3)
#---------------------------------------------------------------------------#
Ex 3: p chart for varying sample size
#---------------------------------------------------------------------------#
head(data)
library(qcc)
qcc(data$Non_conforming_units,data$Sample_size,type='p')
#---------------------------------------------------------------------------#
Ex 4: p chart for constant sample size
#---------------------------------------------------------------------------#
Sample_size=rep(100,20)
data1=cbind(data,Sample_size)
head(data1)
library(qcc)
qcc(data1$Defectives,data1$Sample_size,type='p')
#---------------------------------------------------------------------------#
Ex 5: NP chart
#---------------------------------------------------------------------------#
data
library(qcc)
qcc(data$Defectives,data$Sample_size,type='np')
#---------------------------------------------------------------------------#
Ex 6 C chart
#---------------------------------------------------------------------------#
data
library(qcc)
qcc(data$Defectives,data$Sample_size,type='c)
#---------------------------------------------------------------------------#
Ex 7 U chart for varying sample size
#---------------------------------------------------------------------------#
head(data)
library(qcc)
sample_size=rep(100,20)
qcc(data$Defectives,sample_size,type="u")
#---------------------------------------------------------------------------#
Ex 8 U chart for same sample size
#---------------------------------------------------------------------------#
head(data)
library(qcc)
qcc(data$Defects,data$Sample_Size,type="u")
#---------------------------------------------------------------
# EX No: 9 COMPARISON OF SHEWART AND CUSUM CHART
#---------------------------------------------------------------
data=c(9.45,7.99,9.29,11.66,12.16,10.18,8.04,11.46,9.2,10.34,9.03,11.47,10.51,
9.4,10.08,9.37,10.62,10.31,8.52,10.84,10.9,9.33,12.29,
11.5,10.6,11.08,10.38,11.62,11.31,10.52)
length(data)
library(qcc)
qcc(data,type="xbar.one",nsigma=3)
cusum(data,decision.interval=5,center=10,std.dev=1,sizes=1)
#------------------------------------------------------------------
Ex 10 EWMA Control Chart
#------------------------------------------------------------------
library("qcc")
data=c(9.45,7.99,9.29,11.66,12.16,10.18,8.04,11.46,9.2,10.34,
9.03,11.47,10.51,9.4,10.08,9.37,10.62,10.31,8.52,10.84,10.9,
9.33,12.29,11.5,10.6,11.08,10.38,11.62,11.31,10.52)
qc=ewma(data,lambda=0.1,nsigma=2.7,center=10,std.dev=1)
summary(qc)
#------------------------------------------------------------------
Ex 11 EWMA OC Curves
#------------------------------------------------------------------
library("qcc")
data
data=data[,-1]
data
beta=oc.curves.xbar(qcc(data,type="xbar",nsigma=3,plot=F))
print(round(beta,digits=4))
#------------------------------------------------------------------
Ex 12 Process Capability Analysis 1
#------------------------------------------------------------------
library("qcc")
x1=c(83,83,85,80,83,88,80,79,78,88,78,81,77,81,85,83,85,82,75,85)
x2=c(79,81,87,81,84,87,81,89,83,83,80,85,82,85,87,83,84,83,77,85)
x3=c(81,85,83,83,85,89,82,88,85,82,78,85,84,85,82,77,84,80,84,86)
x4=c(82,87,84,84,83,90,84,89,86,85,82,85,85,85,85,81,80,80,77,83)
x5=c(83,81,86,83,84,88,81,89,93,82,81,84,87,84,89,80,82,83,78,80)
piston=data.frame(x1,x2,x3,x4,x5)
library(qcc)
q=qcc(piston,"xbar",nsigma=3,plot=F)
process.capability(q,spec.limits=c(80,84))
#------------------------------------------------------------------
Ex 13 Process Capability Analysis 2
#------------------------------------------------------------------
library(qcc)
data
data=data[,-1]
data
q=qcc(data,"xbar",nsigma=3,plot=F)
process.capability(q,spec.limits=c(55,75))
#------------------------------------------------------------------
Ex 14-16 Pattern Analysis
#------------------------------------------------------------------
library(qcc)
data
data=data[,-1]
data
qcc(data,"xbar",nsigmas=3)
qcc(data,"S",nsigmas=3)
#===============================================================
#MVA
#===============================================================
# ---------------------------------------------------------------
# Exercise 1: Cluster Analysis - Hierarchical Clustering
# ---------------------------------------------------------------
# Dataset
cars2 = data.frame(mtcars)
# Step 1: Standardize the data
cars = scale(mtcars) # standardize variables
# Step 2: Find the Distance Matrix
d = dist(cars, method = "euclidean") # distance matrix
# Step 3: Perform Hierarchical Clustering procedure
# Linkage Method: Single, Complete Linkage, Average, Ward
fit = hclust(d, method="ward.D")
# Display dendogram
plot(fit,main="Hierarchial clustering for mtcars dataset")
# cut tree into 3 clusters
groups = cutree(fit, k=3)
# draw dendogram with red borders around the 3 clusters
rect.hclust(fit, k=3, border="red")
#Getting the dimension
dim(mtcars)
#Subsetting the dataset
cars2 = data.frame(mtcars[2:11])
#Aggregate the dataset
aggregate(cars2,by = list(groups),FUN=mean)
#INTER
From the above dendrogram it is clear that we have formed
three different clusters from the given data with 32 observations.
And the three clusters are separated by a rectangle, highlighted as above.
#-------------------------------------------------
#Exercise 2: Cluster Analysis K Means Clustering
#--------------------------------------------------
#K MEANS CLUSTER ANALYSIS
#K-Means Cluster Analysis
#kmeans (Data, Number of Clusters)
#Determine number of clusters
cars=data.frame(mtcars)
#Standardize the data
cars=scale(mtcars)
#kmeans (data, number of clusters)
#Determine number of cluster
wilks_lambda = numeric(10)
for (i in 1:10)
{
set.seed(i)
fit=kmeans(cars,i)
wilks_lambda[i]= sum (fit$withinss) /fit$totss
}
fit=kmeans(cars,i)
plot (1:10, wilks_lambda, type="b", xlab="Number of Clusters",
ylab="ClusterWSS/TSs")
#4 cluster solution - get cluster means

fit = kmeans (cars, 4)
cars_vars= data.frame (cars [2:11])
aggregate (mtcars, by=list (fit$cluster), FUN=mean)
#append cluster assignment
out2 = data.frame (mtcars, fit$cluster)
out2
#INTER
Using the standardized data, the Wilks lambda is found for
various number of clusters, i.e., from 1-10 by k-means and
plotted. From the plot we can see that, four clusters would
be apt for this dataset (elbow rule). The mean of the
variables, cluster wise, is found.
# ---------------------------------------------------------------
# Exercise 3:Principal Component Analysis
# ---------------------------------------------------------------
#------------------------- Variable Clustering ------------------------
#install.packages("ClustOfVar")
library("ClustOfVar")
#------------------ Principal Component Analysis ------------------------

cars_ratings = read.csv(file.choose(),header=T)
row.names(cars_ratings) = cars_ratings[,1]
head(cars_ratings)
fit = princomp(cars_ratings[,2:9],cor=TRUE)
fit
summary(fit)
plot(fit,type="lines",main="Principal Component Scree Plot")
coeff = loadings(fit)
print(coeff,digits=3,cutoff=0)
cars_pcscores = predict(fit,cars_ratings)
biplot(fit , xlab="PC-1", ylab="PC-2", main="BI-PLOT" , cex=0.8)
#INTER
 The first principal component contains 77.8% of the total
information with an eigen value of 2.49.
 The second principal component contains 15.3% of the total
information with an eigen value of 1.105.
 The first two principal components explains 93.1% of the
total information in the given data.
With the help of scree plot, we can see that, there is
not much of difference in variation explained beyond the third
component (elbow rule). Hence, the first two components are the
ones which explains higher variation. Based on the scree plot
along with the minimum eigen value criteria (eigen value greater
than one), we can retain the first two principle components
which captures 93.1% of the total variation.
The principal component gives that Audi and BMW have luxury
and flawless bodywork and finished. Caddila, Lexus, Mercedes
are known for safe and strong brand reputation. Acura and
Infinity are known for fun and playful and young and
youthful. Else other brands are known for just basic need
& cheap and easy to maintain.
# ---------------------------------------------------------------
# Exercise 4: Factor Analysis
# ---------------------------------------------------------------
#----------------- Factor Analysis -------------------------------------
cars_ratings = read.csv(file.choose(),header=T)
row.names(cars_ratings) = cars_ratings[,1]
head(cars_ratings)
fit_fa = factanal(cars_ratings[,2:9],2, rotation="varimax",scores="regression")
print(fit_fa,digits=2, cutoff=.001)
factor_scores = fit_fa$scores
factor_scores = data.frame(factor_scores)
names(factor_scores) = c("Sporty","Branded_Luxury")
plot(factor_scores)
library(ggplot2)
ggplot(data=factor_scores,aes(x=Sporty,y=Branded_Luxury))+
geom_text(aes(label=row.names(cars_ratings),size=1,hjust=0,vjust=0),show.legend =
FALSE) +
ggtitle("Factor Analysis Biplot")
#INTER
 The communalities are high for all the variables in the study,
hence the underlying factors are combined to better explain the
information of the study variable.
 The first two factors capture 91% of the total information
in the variables, hence a biplot is a reasonable representation of
the positions with respect to multidimensional characteristics in
a two dimensional space.
 “Fun and playful”, “young and youthful” loads positively
high on the first factor whereas “cheap and easy to maintain”
and “just basic need” loads highly negative on the first factor
. Hence we can name the first factor as “Flashy youth & high maintenance”.
 “Strong brand reputation”, “safe” and “flawless bodywork
and finish” loads high on factor 2, hence we can name the
second factor as “High branded quality & safety assured”.
 From the bi-plot, BMW, Audi are high on sporty whereas
Toyota, Honda, Kia are considered to be for basic transportation
use. Honda and Mercedes are perceived to be highly branded
whereas Daewoo, Kia and Hyundai are not doing well in terms
of brand image with customers.
#-------------------------------------------------
#Exercise 5: LINEAR DISCRMINANT ANALYSIS
#--------------------------------------------------
install.packages("survival")
install.packages("MASS")
library("survival")
library("MASS")
lung_cancer_data<- cancer
lung_cancer_data$missing <- rowSums(is.na(cancer))

lung_cancer_data_v2 <- lung_cancer_data[which(lung_cancer_data$missing==0),]
dim(lung_cancer_data_v2)
linear <- lda(status ~ age + sex + ph.ecog + ph.karno +

pat.karno + meal.cal + wt.loss , lung_cancer_data_v2)
linear
p1 <- predict(linear, lung_cancer_data_v2)$class

tab <- table(Predicted = p1, Actual = lung_cancer_data_v2$status)
tab
prob <- predict(linear, lung_cancer_data_v2)$posterior

head(prob)
#INTER
#-------------------------------------------------
#Exercise 6: MANOVA
#--------------------------------------------------
head(iris)
MANOVA=manova(cbind(Sepal.Length,Petal.Length)~Species,data=iris)
summary(MANOVA)
summary.aov(MANOVA)
#=================================================================
#STOCHASTIC
#=================================================================
#--------------------------------------------------------------------------#
# Ex 1 CLASSIFICATION OF STATES OF A MARKOV CHAIN
#--------------------------------------------------------------------------#
tpm=matrix(c(0,1,0,0,0,1,0.5,0.5,0),nrow=3,ncol=3,byrow=TRUE)
tpm
tpm2=tpm%*%tpm
tpm2
tpm3=tpm2%*%tpm
tpm3
tpm4=tpm3%*%tpm
tpm4
tpm5=tpm4%*%tpm
tpm5
tpm6=tpm5%*%tpm
tpm6
#INTER
P11(3) > 0, P13(5) > 0, P21(2) > 0, P22(2) > 0, P33(2) > 0 and all other Pij(n) >
0.
Therefore the Markov chain is irreducible.
P11(3), P11(5), P11(6) are > 0, GCD of 3,5,6,…=1
P22(2), P22(3), P22(5), P22(6) are > 0, GCD of 2,3,5,6,…=1
P33(2), P33(3), P33(5), P33(6) are > 0, GCD of 2,3,5,6,…=1
Therefore all the states A, B and C are aperiodic.
Since the chain is finite and irreducible, all its states are non-null persistent,
moreover all the states are ergodic.
#--------------------------------------------------------------------------#
# Ex 2
#--------------------------------------------------------------------------#
tpm=matrix(c(3/4,1/4,0,1/4,1/2,1/4,0,3/4,1/4),nrow=3,ncol=3,byrow=TRUE)
tpm
tpm2=tpm%*%tpm
tpm2
#INTER
#--------------------------------------------------------------------------#
# Ex 3 Determining Limiting Probability Based on Given TPM
#--------------------------------------------------------------------------#
tpm=matrix(c(0,1,0,0.5,0,0.5,0,1,0),nrow=3,ncol=3,byrow=TRUE)
tpm
tpm2=tpm%*%tpm
tpm2
tpm3=tpm2%*%tpm
tpm3
tpm4=tpm3%*%tpm
tpm4
tpm5=tpm4%*%tpm
tpm5
tpm6=tpm5%*%tpm
tpm6
#INTER
We can observe that P= P3 =P5 and P2= P4= P6
In general, P2n= P2, P2n+1= P
We note that,
P∞(2)>0, P01(1)>0, P02(2)>0
P10(1)>0, P11(2)>0, P12(1)>0
P20(2)>0, P21(1)>0, P22(2)>0
Conclusion:
Pij(n)>0 for all i and j for some n. Therefore the Markov chain is irreducible.
Pii(2)= Pii(4)= Pii(6) >0 for all i=0,1,2.
Hence all the stats of the given Markov chain has period 2.
#--------------------------------------------------------------------------#
# Ex 4 Steady state distribution of a Markov Chain
#--------------------------------------------------------------------------#
tpm=matrix(c(0,1,1/2,1/2),nrow=2,ncol=2,byrow=TRUE)
tpm
int_dist=matrix(c(0.8333,0.1667),nrow=1,ncol=2,byrow=TRUE)
for(i in 1:100)
{
int_dist=int_dist%*%tpm
}
limiting_dist=int_dist
limiting_dist
coeff=matrix(c(-1,0.5,1,1),nrow=2,ncol=2,byrow=TRUE)
rhs=matrix(c(0,1),nrow=2,ncol=1,byrow=FALSE)
limiting_dist=solve(coeff,rhs)
limiting_dist
#INTER
In the long run, the probability that man travels by car alone is 0.667.

New Text Document

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

New Text Document

Uploaded by

Copyright:

Available Formats

#---------------------------------------------------------------------------#

Ex1: XBAR CHART

#4 cluster solution - get cluster means

#------------------ Principal Component Analysis ------------------------

lung_cancer_data$missing <- rowSums(is.na(cancer))

linear <- lda(status ~ age + sex + ph.ecog + ph.karno +

p1 <- predict(linear, lung_cancer_data_v2)$class

prob <- predict(linear, lung_cancer_data_v2)$posterior

You might also like