You are on page 1of 14

>#

> ###########################

> ### Tasks A trees #########

> ###########################

>#

>#

> # Part iii

> (full.mod<-lm(MDBH~HD+Age+I(HD/N),trees))

Call:

lm(formula = MDBH ~ HD + Age + I(HD/N), data = trees)

Coefficients:

(Intercept) HD Age I(HD/N)

3.26543 0.06368 -0.13504 29.81722

>#

> # Part iii

> red.mod<-lm(MDBH~Age,trees)

> anova(red.mod,full.mod)

Analysis of Variance Table

Model 1: MDBH ~ Age

Model 2: MDBH ~ HD + Age + I(HD/N)

Res.Df RSS Df Sum of Sq F Pr(>F)

1 18 5.6467

2 16 1.4284 2 4.2183 23.626 1.676e-05 ***

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>

> # Part iv

> # Use stepwise algorithm

> step(full.mod)

Start: AIC=-44.78

MDBH ~ HD + Age + I(HD/N)

Df Sum of Sq RSS AIC

<none> 1.4284 -44.784

- Age 1 0.66085 2.0892 -39.179

- HD 1 1.57782 3.0062 -31.901

- I(HD/N) 1 2.76552 4.1939 -25.242

Call:

lm(formula = MDBH ~ HD + Age + I(HD/N), data = trees)

Coefficients:

(Intercept) HD Age I(HD/N)

3.26543 0.06368 -0.13504 29.81722

>#

> # May also use add1 or drop1 if so desired

>#

> # Part v

> # If you propose a new model, you will need to verify the appropriateness

> # of your model and argue, based output from code you devlop and use here,

> # why your new model is better

>#

> # Be aware that you may need to do this.


>#

> # .... put model construction and diagnostics code here (look back

> # at code provided for earlier assignments

> ###########################

> ### Tasks B teachers ######

> ###########################

>#

>#

> # Part i

>

> new.mod<-lm(y~x1+x2+x3+x4,teachers)

> summary(new.mod)

Call:

lm(formula = y ~ x1 + x2 + x3 + x4, data = teachers)

Residuals:

Min 1Q Median 3Q Max

-136.848 -20.382 -4.111 34.087 95.748

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) -280.7634 191.5527 -1.466 0.15997

x1 0.6677 0.4402 1.517 0.14664

x2 2.5932 0.9651 2.687 0.01506 *

x3 -0.8426 0.6682 -1.261 0.22336

x4 6.8913 1.7737 3.885 0.00108 **

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 55.92 on 18 degrees of freedom

Multiple R-squared: 0.6522, Adjusted R-squared: 0.5749

F-statistic: 8.439 on 4 and 18 DF, p-value: 0.0005115

>

> e<-residuals(new.mod)

> h<-hatvalues(new.mod)

> y.hat<-fitted.values(new.mod)

> s<-summary(new.mod)$sigma

> stud<-e/(s*sqrt(1-h))

>

> win.graph(width = 8, height = 11)

> par(mfrow=c(3,2),cex=.75, ps=10)

>

> qqnorm(stud, main=NULL, sub="(a)",ylim=c(-3,3),xlim=c(-3,3))

> abline(a=0, b=1, lty=2)

>

> plot(stud~y.hat, main=NULL, xlab=expression(hat(y)),

+ ylab="Stud. Residuals", ylim=c(-3,3), sub="(b)")

> abline(h=c(-2,0,2), lty=c(2,1,2))

>

> with(teachers, plot(stud~x1, main=NULL,

+ ylab="Stud. Residuals", xlab=expression(x[1]),

+ ylim=c(-3,3), sub="(c)"))

> abline(h=c(-2,0,2), lty=c(2,1,2))

>

> with(teachers, plot(stud~x2, main=NULL,

+ ylab="Stud. Residuals", xlab=expression(x[2]),


+ ylim=c(-3,3), sub="(d)"))

> abline(h=c(-2,0,2), lty=c(2,1,2))

>

> with(teachers, plot(stud~x3, main=NULL,

+ ylab="Stud. Residuals", xlab=expression(x[3]),

+ ylim=c(-3,3), sub="(e)"))

> abline(h=c(-2,0,2), lty=c(2,1,2))

>

> with(teachers, plot(stud~x4, main=NULL,

+ ylab="Stud. Residuals", xlab=expression(x[4]),

+ ylim=c(-3,3), sub="(f)"))

> abline(h=c(-2,0,2), lty=c(2,1,2))

>#

>#

> # Part ii

>

> old.mod<-lm(y~x1+x2+x3+x4+gender,teachers)

> anova(new.mod,old.mod)

Analysis of Variance Table

Model 1: y ~ x1 + x2 + x3 + x4

Model 2: y ~ x1 + x2 + x3 + x4 + gender

Res.Df RSS Df Sum of Sq F Pr(>F)

1 18 56290

2 17 55459 1 830.95 0.2547 0.6203

>

> # Part iii

>

> PRESS.new<-sum((e/(1-h))^2)
> PRESS.old<-sum((residuals(old.mod)/(1-hatvalues(old.mod)))^2)

>

> data.frame(PRESS.new,PRESS.old,row.names="")

PRESS.new PRESS.old

84378.64 94980.15

>

> summary(old.mod)

Call:

lm(formula = y ~ x1 + x2 + x3 + x4 + gender, data = teachers)

Residuals:

Min 1Q Median 3Q Max

-127.964 -22.347 -4.978 30.319 100.647

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) -279.7006 195.6572 -1.430 0.1710

x1 0.5309 0.5250 1.011 0.3261

x2 2.6229 0.9874 2.656 0.0166 *

x3 -0.8291 0.6830 -1.214 0.2414

x4 7.1288 1.8717 3.809 0.0014 **

genderF -14.3063 28.3467 -0.505 0.6203

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 57.12 on 17 degrees of freedom

Multiple R-squared: 0.6573, Adjusted R-squared: 0.5566

F-statistic: 6.522 on 5 and 17 DF, p-value: 0.001471


> summary(new.mod)

Call:

lm(formula = y ~ x1 + x2 + x3 + x4, data = teachers)

Residuals:

Min 1Q Median 3Q Max

-136.848 -20.382 -4.111 34.087 95.748

Coefficients:

Estimate Std. Error t value Pr(>|t|)

(Intercept) -280.7634 191.5527 -1.466 0.15997

x1 0.6677 0.4402 1.517 0.14664

x2 2.5932 0.9651 2.687 0.01506 *

x3 -0.8426 0.6682 -1.261 0.22336

x4 6.8913 1.7737 3.885 0.00108 **

---

Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 55.92 on 18 degrees of freedom

Multiple R-squared: 0.6522, Adjusted R-squared: 0.5749

F-statistic: 8.439 on 4 and 18 DF, p-value: 0.0005115

> ########################

> ### Tasks C bread ######

> ########################

>#

>#
> # Part i

> full.mod<-lm(y~x1+x2+x3+I(x1*x2)+I(x1*x3)

+ +I(x2*x3)+I(x1^2)+I(x2^2)+I(x3^2),bread)

>#

> no.int<-lm(y~x1+x2+x3+I(x1^2)+I(x2^2)+I(x3^2),bread)

>#

> no.sq<-lm(y~x1+x2+x3+I(x1*x2)+I(x1*x3)+I(x2*x3),bread)

>#

> no.quad<-lm(y~x1+x2+x3,bread)

>

> anova(no.int,full.mod)

Analysis of Variance Table

Model 1: y ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2)

Model 2: y ~ x1 + x2 + x3 + I(x1 * x2) + I(x1 * x3) + I(x2 * x3) + I(x1^2) +

I(x2^2) + I(x3^2)

Res.Df RSS Df Sum of Sq F Pr(>F)

1 13 27.427

2 10 24.267 3 3.16 0.4341 0.7333

> anova(no.sq,full.mod)

Analysis of Variance Table

Model 1: y ~ x1 + x2 + x3 + I(x1 * x2) + I(x1 * x3) + I(x2 * x3)

Model 2: y ~ x1 + x2 + x3 + I(x1 * x2) + I(x1 * x3) + I(x2 * x3) + I(x1^2) +

I(x2^2) + I(x3^2)

Res.Df RSS Df Sum of Sq F Pr(>F)

1 13 56.045

2 10 24.267 3 31.779 4.3652 0.0329 *

---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> anova(no.quad,full.mod)

Analysis of Variance Table

Model 1: y ~ x1 + x2 + x3

Model 2: y ~ x1 + x2 + x3 + I(x1 * x2) + I(x1 * x3) + I(x2 * x3) + I(x1^2) +

I(x2^2) + I(x3^2)

Res.Df RSS Df Sum of Sq F Pr(>F)

1 16 59.205

2 10 24.267 6 34.939 2.3996 0.1061

>#

> # Part ii

> step(full.mod,trace=2)

Start: AIC=23.87

y ~ x1 + x2 + x3 + I(x1 * x2) + I(x1 * x3) + I(x2 * x3) + I(x1^2) +

I(x2^2) + I(x3^2)

Df Sum of Sq RSS AIC

- I(x2 * x3) 1 0.1800 24.447 22.015

- I(x1 * x2) 1 0.9800 25.247 22.659

- I(x1 * x3) 1 2.0000 26.267 23.451

<none> 24.267 23.867

- x3 1 3.4161 27.683 24.502

- I(x3^2) 1 11.1974 35.464 29.456

- I(x1^2) 1 11.7078 35.974 29.742

- x1 1 11.8821 36.149 29.838

- x2 1 15.1090 39.376 31.548

- I(x2^2) 1 20.0036 44.270 33.892


Step: AIC=22.02

y ~ x1 + x2 + x3 + I(x1 * x2) + I(x1 * x3) + I(x1^2) + I(x2^2) +

I(x3^2)

Df Sum of Sq RSS AIC

- I(x1 * x2) 1 0.9800 25.427 20.801

- I(x1 * x3) 1 2.0000 26.447 21.588

<none> 24.447 22.015

- x3 1 6.4277 30.874 24.684

- I(x3^2) 1 11.1974 35.644 27.557

- I(x1^2) 1 11.7078 36.154 27.841

- x1 1 11.8821 36.329 27.938

- x2 1 15.9115 40.358 30.041

- I(x2^2) 1 20.0036 44.450 31.973

Step: AIC=20.8

y ~ x1 + x2 + x3 + I(x1 * x3) + I(x1^2) + I(x2^2) + I(x3^2)

Df Sum of Sq RSS AIC

- I(x1 * x3) 1 2.0000 27.427 20.316

<none> 25.427 20.801

- x3 1 6.4277 31.854 23.309

- x1 1 11.1677 36.594 26.083

- I(x3^2) 1 11.1974 36.624 26.099

- I(x1^2) 1 11.7078 37.134 26.376

- I(x2^2) 1 20.0036 45.430 30.409

- x2 1 20.1062 45.533 30.454

Step: AIC=20.32
y ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2)

Df Sum of Sq RSS AIC

<none> 27.427 20.316

- x1 1 10.082 37.508 24.577

- I(x3^2) 1 11.197 38.624 25.163

- I(x1^2) 1 11.708 39.134 25.425

- x3 1 16.557 43.984 27.762

- I(x2^2) 1 20.004 47.430 29.270

- x2 1 20.106 47.533 29.314

Call:

lm(formula = y ~ x1 + x2 + x3 + I(x1^2) + I(x2^2) + I(x3^2),

data = bread)

Coefficients:

(Intercept) x1 x2 x3 I(x1^2) I(x2^2)

-96.623354 0.485477 1.654445 6.187358 -0.001024 -0.014952

I(x3^2)

-2.307924

>

> # A fancy-pants comprehensive approach to model/variable selection

> # May need to install package leaps

>#

> require(leaps)

Loading required package: leaps

Warning message:

In library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE, :


there is no package called ‘leaps’

> search.results<-regsubsets(y~x1+x2+x3+I(x1*x2)+I(x1*x3)

+ +I(x2*x3)+I(x1^2)+I(x2^2)+I(x3^2),

+ data=bread, nvmax=10, nbest=2,method="exhaustive")

Error in regsubsets(y ~ x1 + x2 + x3 + I(x1 * x2) + I(x1 * x3) + I(x2 * :

could not find function "regsubsets"

> selection.criteria<-summary(search.results)

Error in summary(search.results) : object 'search.results' not found

> detach(package:leaps)

Error in detach(package:leaps) : invalid 'name' argument

>

> #Obtain sample size

> n<-dim(bread)[1]

> #Extract the number of variables included in the test model

> q<-as.integer(row.names(selection.criteria$which))

Error in row.names(selection.criteria$which) :

object 'selection.criteria' not found

> #Extract the mean square error for the test model

> mse<-selection.criteria$rss/(n-q-1)

Error: object 'selection.criteria' not found

> #Extract R square and adjusted R square

> R.sq<-selection.criteria$rsq

Error: object 'selection.criteria' not found

> AdjR.sq<-selection.criteria$adjr2

Error: object 'selection.criteria' not found

> #Extract Mallow's statistic

> Cp<-selection.criteria$cp

Error: object 'selection.criteria' not found

> #Compute the AIC for the test model by formula


> aic.f<-n*log(selection.criteria$rss)-n*log(n)+2*(q+1)

Error: object 'selection.criteria' not found

> #Compute the BIC for the test model by formula

> bic.f<-n*log(selection.criteria$rss)-n*log(n)+(q+1)*log(n)

Error: object 'selection.criteria' not found

> #Extract variable information

> var<-as.matrix(selection.criteria$which[,2:10])

Error in as.matrix(selection.criteria$which[, 2:10]) :

object 'selection.criteria' not found

> #Create the criteria table

> criteria.table<-data.frame(cbind(q,mse,R.sq,AdjR.sq,Cp,aic.f,bic.f,

+ var[,1],var[,2],var[,3],var[,4],var[,5],var[,6],var[,7],var[,8],var[,9]),

+ row.names=NULL)

Error in cbind(q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var[, 1], var[, :

object 'mse' not found

> #Name the columns

> names(criteria.table)<-c("q","MSE","Rsq","aRsq","Cp","AIC","BIC","x1","x2","x3",

+ "x1*x2","x1*x3","x2*x3","x1^2","x2^2","x3^2")

Error in names(criteria.table) <- c("q", "MSE", "Rsq", "aRsq", "Cp", "AIC", :

object 'criteria.table' not found

> #Clean up the workspace

> rm(n,q,mse,R.sq,AdjR.sq,Cp,aic.f,bic.f,var)

Warning messages:

1: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'q' not found

2: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'mse' not found

3: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'R.sq' not found


4: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'AdjR.sq' not found

5: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'Cp' not found

6: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'aic.f' not found

7: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'bic.f' not found

8: In rm(n, q, mse, R.sq, AdjR.sq, Cp, aic.f, bic.f, var) :

object 'var' not found

> #Take a look at the contents of the criteria table

> round(criteria.table,2)

Error: object 'criteria.table' not found

> # Part iii and iv

> # You will have to develop code for your new proposed model

> # See code for assignments 5 and 6.

You might also like