You are on page 1of 38

Question 3

Venkata Yadati

May 27, 2020

Warning
Warning: You must have the “PPforest” package and “effects” package installed for this
assignment. You can install these packages by running the following code in base R (not
RStudio or RMarkdown).
install.packages(“PPforest”)
install.packages(“effects”)

Individual Variable Data Analysis


Variable: Private
private <- college$Private

table(private)

## private
## No Yes
## 212 565

There are 212 public universities and 565 private univeristies in the set of data, making the
data set filled with 777 colleges.

Variable: Apps
apps <- college$Apps
quantile(apps)

## 0% 25% 50% 75% 100%


## 81 776 1558 3624 48094

mean(apps)

## [1] 3001.638

var(apps)

## [1] 14978460

sd(apps)

## [1] 3870.201

boxplot(apps)
hist(apps)
Variable: Accept
accept <- college$Accept
quantile(accept)

## 0% 25% 50% 75% 100%


## 72 604 1110 2424 26330

mean(accept)

## [1] 2018.804

var(accept)

## [1] 6007960

sd(accept)

## [1] 2451.114

boxplot(accept)
hist(accept)
Variable: Enroll
enroll <- college$Enroll
quantile(enroll)

## 0% 25% 50% 75% 100%


## 35 242 434 902 6392

mean(enroll)

## [1] 779.973

var(enroll)

## [1] 863368.4

sd(enroll)

## [1] 929.1762

boxplot(enroll)
hist(enroll)
Variable: Top10perc
top10perc <- college$Top10perc
quantile(top10perc)

## 0% 25% 50% 75% 100%


## 1 15 23 35 96

mean(top10perc)

## [1] 27.55856

var(top10perc)

## [1] 311.1825

sd(top10perc)

## [1] 17.64036

boxplot(top10perc)
hist(top10perc)
Variable: Top25perc
top25perc <- college$Top25perc
quantile(top25perc)

## 0% 25% 50% 75% 100%


## 9 41 54 69 100

mean(top25perc)

## [1] 55.79665

var(top25perc)

## [1] 392.2292

sd(top25perc)

## [1] 19.80478

boxplot(top25perc)
hist(top25perc)
Variable: F.Undergrad
fundergrad <- college$F.Undergrad
quantile(fundergrad)

## 0% 25% 50% 75% 100%


## 139 992 1707 4005 31643

mean(fundergrad)

## [1] 3699.907

var(fundergrad)

## [1] 23526579

sd(fundergrad)

## [1] 4850.421

boxplot(fundergrad)
hist(fundergrad)
Variable: P.Undergrad
pundergrad <- college$P.Undergrad
quantile(pundergrad)

## 0% 25% 50% 75% 100%


## 1 95 353 967 21836

mean(pundergrad)

## [1] 855.2986

var(pundergrad)

## [1] 2317799

sd(pundergrad)

## [1] 1522.432

boxplot(pundergrad)
hist(pundergrad)
Variable: Outstate
outstate <- college$Outstate
quantile(outstate)

## 0% 25% 50% 75% 100%


## 2340 7320 9990 12925 21700

mean(outstate)

## [1] 10440.67

var(outstate)

## [1] 16184662

sd(outstate)

## [1] 4023.016

boxplot(outstate)
hist(outstate)
Variable: Room.Board
roomboard <- college$Room.Board
quantile(roomboard)

## 0% 25% 50% 75% 100%


## 1780 3597 4200 5050 8124

mean(roomboard)

## [1] 4357.526

var(roomboard)

## [1] 1202743

sd(roomboard)

## [1] 1096.696

boxplot(roomboard)
hist(roomboard)
Variable: Books
books <- college$Books
quantile(books)

## 0% 25% 50% 75% 100%


## 96 470 500 600 2340

mean(books)

## [1] 549.381

var(books)

## [1] 27259.78

sd(books)

## [1] 165.1054

boxplot(books)
hist(books)
Variable: Personal
personal <- college$Personal
quantile(personal)

## 0% 25% 50% 75% 100%


## 250 850 1200 1700 6800

mean(personal)

## [1] 1340.642

var(personal)

## [1] 458425.8

sd(personal)

## [1] 677.0715

boxplot(personal)
hist(personal)
Variable: PhD
phd <- college$PhD
quantile(phd)

## 0% 25% 50% 75% 100%


## 8 62 75 85 103

mean(phd)

## [1] 72.66023

var(phd)

## [1] 266.6086

sd(phd)

## [1] 16.32815

boxplot(phd)
hist(phd)
Variable: Terminal
terminal <- college$Terminal
quantile(terminal)

## 0% 25% 50% 75% 100%


## 24 71 82 92 100

mean(terminal)

## [1] 79.7027

var(terminal)

## [1] 216.7478

sd(terminal)

## [1] 14.72236

boxplot(terminal)
hist(terminal)
Variable: S.F.Ratio
sfratio <- college$S.F.Ratio
quantile(sfratio)

## 0% 25% 50% 75% 100%


## 2.5 11.5 13.6 16.5 39.8

mean(sfratio)

## [1] 14.0897

var(sfratio)

## [1] 15.66853

sd(sfratio)

## [1] 3.958349

boxplot(sfratio)
hist(sfratio)
Variable: perc.alumni
percalumni <- college$perc.alumni
quantile(percalumni)

## 0% 25% 50% 75% 100%


## 0 13 21 31 64

mean(percalumni)

## [1] 22.74389

var(percalumni)

## [1] 153.5567

sd(percalumni)

## [1] 12.3918

boxplot(percalumni)
hist(percalumni)
Variable: Expend
expend <- college$Expend
quantile(expend)

## 0% 25% 50% 75% 100%


## 3186 6751 8377 10830 56233

mean(expend)

## [1] 9660.171

var(expend)

## [1] 27266866

sd(expend)

## [1] 5221.768

boxplot(expend)
hist(expend)
Variable: Grad.Rate
gradrate <- college$Grad.Rate
quantile(gradrate)

## 0% 25% 50% 75% 100%


## 10 53 65 78 118

mean(gradrate)

## [1] 65.46332

var(gradrate)

## [1] 295.0737

sd(gradrate)

## [1] 17.17771

boxplot(gradrate)
hist(gradrate)
MultiVariate Data Analysis
Acceptance Rate vs. GradRate
college$acceptrate <- college$Accept/college$Apps
mean(college$acceptrate)

## [1] 0.7469277

model1 <- lm(college$Grad.Rate ~ college$acceptrate, data = college)


summary(model1)

##
## Call:
## lm(formula = college$Grad.Rate ~ college$acceptrate, data = college)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.491 -10.806 0.968 12.496 57.411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90.493 3.059 29.58 < 2e-16 ***
## college$acceptrate -33.510 4.018 -8.34 3.39e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.47 on 775 degrees of freedom
## Multiple R-squared: 0.08235, Adjusted R-squared: 0.08117
## F-statistic: 69.55 on 1 and 775 DF, p-value: 3.39e-16

plot(college$acceptrate,college$Grad.Rate)

Room.Board vs. Books
model2 <- lm(college$Books ~ college$Room.Board, data = college)
summary(model2)

##
## Call:
## lm(formula = college$Books ~ college$Room.Board, data = college)
##
## Residuals:
## Min 1Q Median 3Q Max
## -418.37 -84.38 -23.92 63.17 1746.59
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.654e+02 2.410e+01 19.313 < 2e-16 ***
## college$Room.Board 1.927e-02 5.363e-03 3.592 0.000349 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 163.9 on 775 degrees of freedom
## Multiple R-squared: 0.01637, Adjusted R-squared: 0.01511
## F-statistic: 12.9 on 1 and 775 DF, p-value: 0.000349

plot(college$Books,college$Room.Board)

perc.alumni vs. Room.Board
model3 <- lm(college$Room.Board ~ college$perc.alumni, data = college)
summary(model3)

##
## Call:
## lm(formula = college$Room.Board ~ college$perc.alumni, data = college)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2386.8 -768.5 -169.1 728.3 3519.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3809.293 79.215 48.09 < 2e-16 ***
## college$perc.alumni 24.105 3.059 7.88 1.11e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1056 on 775 degrees of freedom
## Multiple R-squared: 0.07418, Adjusted R-squared: 0.07299
## F-statistic: 62.1 on 1 and 775 DF, p-value: 1.107e-14

plot(college$Room.Board,college$perc.alumni)

You might also like