You are on page 1of 3

# FILENAME IS Chap4RCode.

txt ;
# THIS CODE USES THE DATASET TermLife.csv
# THIS CODE USES THE DATASET AutoClaims.csv
# THIS CODE USES THE DATASET WiscHospCosts.csv

Term <- read.table(choose.files(), header=TRUE, sep=",")

# PICK THE SUBSET OF THE DATA CORRESPONDING TO TERM PURCHASE


Term2 <- subset(Term, subset=FACE > 0)
names(Term2)
attach(Term2)
Term2$LNFACE <- with(Term2, log(FACE))
Term2$LNINCOME <- with (Term2, log(INCOME))
Term2$MARSTAT <- as.factor(Term2$MARSTAT)
boxplot(LNFACE ~ MARSTAT, ylab="LNFACE", xlab="MARSTAT", data=Term2)
dev.off()

table(Term2$MARSTAT)

# SUMMARY BY LEVEL OF MARSTAT


library(Rcmdr)
library(abind)
numSummary(Term2[, "LNFACE"], groups=Term2$MARSTAT, statistics=c("mean", "sd"))
numSummary(Term2[, "LNFACE"], statistics=c("mean", "sd"))

# MAKE BINARY VARIABLES


Term2$MAR0 <- with(Term2, 1*(MARSTAT == 0))
Term2$MAR1 <- with(Term2, 1*(MARSTAT == 1))
Term2$MAR2 <- with(Term2, 1*(MARSTAT == 2))

Check1 <- data.frame(MARSTAT, Term2$MAR0, Term2$MAR1, Term2$MAR2)


fix(Check1)

# CHECK THE DEPENDENCIES AMONG MARO, MAR1, MAR2


cor(Term2[, c("MAR0", "MAR1", "MAR2")], use="complete.obs")

# SECTION 4.1 OUTPUT


model5 <- lm(LNFACE ~ LNINCOME+EDUCATION+NUMHH +MAR0+MAR1, data=Term2)
summary(model5)
anova(model5)
aov(model5)

summary(lm(LNFACE ~ LNINCOME+EDUCATION+NUMHH +MAR0+MAR2, data=Term2))


summary(lm(LNFACE ~ LNINCOME+EDUCATION+NUMHH +MAR1+MAR2, data=Term2))

# TESTING MODEL SUBSETS


model4 <- lm(LNFACE ~ LNINCOME+EDUCATION+NUMHH, data=Term2)
anova(model4,model5)

# COMPARE MODEL WITHOUT FACTOR TO THE ONE WITH


# THE F-RATIO TURNS OUT TO BE 3.236
qf(c(0.95), df1=2, df2=269, lower.tail=TRUE) # GET THE F-VALUE AT 5%
pf(c(3.236), df1=2, df2=269, lower.tail=FALSE) # GET THE p-VALUE

# SECTION 4.2.2 GENERAL LINEAR HYPOTHESIS TEST


library(car)
Hypothesis <- matrix(c(0,0,0,0,1,0,0,0,0,0,0,1), 2, 6, byrow=TRUE)
RHS <- c(0,0)
linear.hypothesis(model5, Hypothesis, rhs=RHS)

# VARIANCE-COVARIANCE MATRIX FOR REGRESSION PARAMETERS


vcov(model5)
# X PRIME X INVERSE MATRIX
summary(model5)$cov.unscaled
# CHECK
(summary(model5)$cov.unscaled)*((summary(model5)$sigma)^2)
# CONFIDENCE INTERVALS FOR REGRESSION PARAMETERS
library(MASS)
Confint(model5, level=.95)

# CONFIDENCE AND PREDICTION INTERVALS FOR NEW DATA


model4 <- lm(LNFACE ~ LNINCOME+EDUCATION+NUMHH, data=Term2)
newdata <- data.frame(LNINCOME=11,EDUCATION=12,NUMHH=3)
predict(model4, newdata, interval="confidence", level=.95)
predict(model4, newdata, interval="prediction", level=.95)
predict(model4, newdata, interval="prediction", level=.90)

# CONFIDENCE INTERVALS FOR LINEAR COMBINATIONS OF REGRESSION COEFFICIENTS


model4 <- lm(LNFACE ~ LNINCOME+EDUCATION+NUMHH, data=Term2)
Hypothesis <- matrix(c(0,0,1,0), 1, 4, byrow=TRUE)
RHS <- c(0)
linear.hypothesis(model4, Hypothesis, rhs=RHS)
Hypothesis <- matrix(c(0,0,1,1), 1, 4, byrow=TRUE)
RHS <- c(0)
linear.hypothesis(model4, Hypothesis, rhs=RHS)

# CREATE THE CATEGORICAL VARIABLE - A "FACTOR"


Term2$MSFactor <- as.factor(Term2$MARSTAT)
str(Term2$MSFactor)
model8<-lm(LNFACE ~ LNINCOME+EDUCATION+NUMHH+MSFactor,data=Term2)
summary(model8)
anova(model8)

# AUTO CLAIMS
detach(Term2)
AutoC <- read.table(choose.files(), header=TRUE, sep=",")
names(AutoC)
fix(AutoC)
attach(AutoC)
str(AutoC)

Hist(AutoC$PAID, scale="frequency")
AutoC$LNPAID <- with(AutoC, log(PAID))
Hist(AutoC$LNPAID, scale="frequency")
boxplot(LNPAID ~ CLASS, data=AutoC)
dev.off()

# CREATE A TABLE OF MEANS AND STANDARD DEVIATIONS


tableout <- numSummary(AutoC[, c("LNPAID","PAID")], groups=AutoC$CLASS,
statistics=c("mean", "sd", "quantiles"), quantiles=c(0, .5, 1))
str(tableout)
setwd("C://")
write.csv(tableout$table, file = "AutoClaimsClassC.csv")

# Alternatively, you can use the following codes, and write the output out to a
word processor:
# library(Hmisc)
# t1 <- summarize(LNPAID, CLASS, length)
# t2 <- summarize(PAID, CLASS, median)
# t3 <- summarize(LNPAID, CLASS, median)
# t4 <- summarize(LNPAID, CLASS, mean)
# t5 <- summarize(LNPAID, CLASS, sd)
# tableout <- cbind(t1,t2[2],t3[2],t4[2],t5[2]);tableout
# setwd("c://")
# write.csv(t(tableout), file = "AutoClaimsClassC.csv")

model1 <- lm(LNPAID ~ CLASS, data=AutoC)


summary(model1)
anova(model1)

boxplot(LNPAID ~ STATE, data=AutoC)

model2 <- lm(LNPAID~ STATE, data=AutoC)


summary(model2)
anova(model2)

model3 <- lm(LNPAID ~ CLASS+STATE+AGE+GENDER, data=AutoC)


summary(model3)
anova(model3)

detach(AutoC)

# WISCONSIN HOSPITAL COSTS DATA


HospitalCosts <- read.table(choose.files(), header=TRUE, sep=",")
attach(HospitalCosts)

HospitalCosts$LNCHGNUM <- with(HospitalCosts, log(CHG_NUM))

# LETTER PLOT FIGURES


HospitalCosts$LNNO_DSCHG <- with(HospitalCosts, log(NO_DSCHG))
plot(LNCHGNUM~LNNO_DSCHG,data=HospitalCosts,subset=DRG==209|DRG==391|DRG==430)
dev.off()