Professional Documents
Culture Documents
Assignment 1 (MM19B053)
Assignment 1 (MM19B053)
2023-02-16
Import Libraries
library('MPV')
library(ggplot2)
library(ggthemes)
Plot theme
Q 2.14
Data
## ratio visc
## 1 1.0 0.45
## 2 0.9 0.20
## 3 0.8 0.34
## 4 0.7 0.58
## 5 0.6 0.70
## 6 0.5 0.57
## 7 0.4 0.55
## 8 0.3 0.44
1
a) Scatter-Plot
ggplot(q1_data)+geom_point(aes(ratio,visc),color='blue',size=2.5)+
scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1),limits=c(0.2,1))+
scale_y_continuous(limits = c(0,0.8))+
labs(x='Ratio',y='Viscosity',title='Scatterplot')+
my_theme
Scatterplot
0.8
0.6
Viscosity
0.4
0.2
0.0
0.2 0.4 0.6 0.8 1.0
Ratio
b) Prediction Equation
##
## Call:
## lm(formula = visc ~ ., data = q1_data)
##
## Coefficients:
## (Intercept) ratio
## 0.6714 -0.2964
2
Prediction equations is:
yˆi = 0.6714 − 0.2964xi
c) Analysis
##
## Call:
## lm(formula = visc ~ ., data = q1_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20464 -0.10634 0.02196 0.08527 0.20643
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6714 0.1595 4.209 0.00563 **
## ratio -0.2964 0.2314 -1.281 0.24754
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.15 on 6 degrees of freedom
## Multiple R-squared: 0.2147, Adjusted R-squared: 0.08382
## F-statistic: 1.64 on 1 and 6 DF, p-value: 0.2475
confint(q1_model) ## Gives 95% confidence interval for both intercept and regressor (x)
## 2.5 % 97.5 %
## (Intercept) 0.2811246 1.061733
## ratio -0.8627412 0.269884
3
anova(q1_model) ## Gives the ANOVA table
F-statistic is close to 1, meaning that Null hypothesis is not rejected. Ratio is not a good variable in
explaining the variability.
F0 = 1.6405 and F0.05,1,6 = 5.9874, so here F0 < Fα,1,n−2 that’s why null hypothesis is not rejected.
ci_band<-as.data.frame(predict(q1_model,q1_data,interval='confidence',level=0.95))
## 95% Confidence level for every point present in the data
## The code here first predicts y_hat for every x present in the data using the
## model we fitted, then finds the 95% confidence interval for each of them and then that table is
## converted to a data frame.
pe_band<-as.data.frame(predict(q1_model,q1_data,interval='prediction',level=0.95))
## The code here first predicts y_hat for every x present in the data, assuming these
## data points were not present while training then using the model we fitted, it finds
## the 95% prediction interval for each of them and then that table is
## converted to a data frame.
ci_band$ratio<-q1_data$ratio ## 'x' variable added to the data frame of CI and PE band dataframe
pe_band$ratio<-q1_data$ratio
4
CI & PI Band
95% Confidence Band 95% Prediction Band Fit
1.00
0.75
Viscosity
0.50
0.25
0.00
Q-2.15
## temp visc
## 1 24.9 1.1330
## 2 35.0 0.9772
## 3 44.9 0.8532
## 4 55.1 0.7550
## 5 65.2 0.6723
## 6 75.2 0.6021
## 7 85.2 0.5420
## 8 95.2 0.5074
Prediction Equation
5
##
## Call:
## lm(formula = visc ~ ., data = q2_data)
##
## Coefficients:
## (Intercept) temp
## 1.281511 -0.008758
Analysis
summary(q2_model)
##
## Call:
## lm(formula = visc ~ ., data = q2_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.043955 -0.035863 -0.009305 0.019900 0.069559
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2815107 0.0468683 27.34 1.58e-07 ***
## temp -0.0087578 0.0007284 -12.02 2.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04743 on 6 degrees of freedom
## Multiple R-squared: 0.9602, Adjusted R-squared: 0.9535
## F-statistic: 144.6 on 1 and 6 DF, p-value: 2.007e-05
6
confint(q2_model)
## 2.5 % 97.5 %
## (Intercept) 1.16682797 1.396193340
## temp -0.01054005 -0.006975593
F-statistic is very far from 1, i.e the Null hypothesis is rejected. F0 = 144.6 and F0.05,1,6 = 5.9874, so here
F0 > Fα,1,n−2 that’s why null hypothesis is rejected.
The Regression is significant here. Temperature is a good variable in explaining the variability of the data.
anova(q2_model)
ci_band2<-as.data.frame(predict(q2_model,q2_data,interval='confidence',level=0.95))
## The code here first predicts y_hat for every x present in the data using the
## model we fitted, then finds the 95% confidence interval for each of them and then that table is
## converted to a data frame.
pe_band2<-as.data.frame(predict(q2_model,q2_data,interval='prediction',level=0.95))
## The code here first predicts y_hat for every x present in the data, assuming these
## data points were not present while training then using the model we fitted, it finds
## the 95% prediction interval for each of them and then that table is
## converted to a data frame.
ci_band2$temp<-q2_data$temp ## 'x' variable added to the data frame of CI and PE band dataframe
pe_band2$temp<-q2_data$temp
ggplot(q2_data)+geom_point(aes(temp,visc),size=2.5,color='black')+
geom_ribbon(data=ci_band2,aes(x=temp,ymin=lwr,ymax=upr,color='95% Confidence Band'),
7
alpha=0,size=1)+
geom_ribbon(data=pe_band2,aes(x=temp,ymin=lwr,ymax=upr,color='95% Prediction Band'),
alpha=0,size=1)+
geom_line(data=pe_band2,aes(x=temp,y=fit,color='Fit'),size=1)+
labs(x='Temperature',y='Viscosity',title='CI & PI Band',color="Legend")+
scale_color_manual(values=colors)+
my_theme
CI & PI Band
95% Confidence Band 95% Prediction Band Fit
1.00
Viscosity
0.75
0.50
40 60 80
Temperature
Q-2.16
q3_data<-p2.16 ## Question-2.16
q3_data
## volume pressure
## 1 2084 4599
## 2 2084 4600
## 3 2273 5044
## 4 2273 5043
## 5 2273 5044
## 6 2463 5488
8
## 7 2463 5487
## 8 2651 5931
## 9 2652 5932
## 10 2652 5932
## 11 2842 6380
## 12 2842 6380
## 13 3030 6818
## 14 3031 6817
## 15 3031 6818
## 16 3221 7266
## 17 3221 7268
## 18 3409 7709
## 19 3410 7710
## 20 3600 8156
## 21 3600 8158
## 22 3788 8597
## 23 3789 8599
## 24 3789 8600
## 25 3979 9048
## 26 3979 9048
## 27 4167 9484
## 28 4168 9487
## 29 4168 9487
## 30 4358 9936
## 31 4358 9938
## 32 4546 10377
## 33 4547 10379
Analysis
##
## Call:
## lm(formula = pressure ~ ., data = q3_data)
##
## Coefficients:
## (Intercept) volume
## -290.707 2.346
##
## Call:
## lm(formula = pressure ~ ., data = q3_data)
##
## Residuals:
## Min 1Q Median 3Q Max
9
## -4.3276 -0.9227 0.0773 1.2676 2.9577
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.907e+02 1.355e+00 -214.6 <2e-16 ***
## volume 2.346e+00 4.007e-04 5855.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.741 on 31 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 3.429e+07 on 1 and 31 DF, p-value: < 2.2e-16
confint(q3_model)
## 2.5 % 97.5 %
## (Intercept) -293.469711 -287.943397
## volume 2.345614 2.347249
anova(q3_model)
10
Plot of 95% Confidence and Prediction interval band
ci_band3<-as.data.frame(predict(q3_model,q3_data,interval='confidence',level=0.95))
## The code here first predicts y_hat for every x present in the data using the
## model we fitted, then finds the 95% confidence interval for each of them and then that table is
## converted to a data frame.
pe_band3<-as.data.frame(predict(q3_model,q3_data,interval='prediction',level=0.95))
## The code here first predicts y_hat for every x present in the data, assuming these
## data points were not present while training then using the model we fitted, it finds
## the 95% prediction interval for each of them and then that table is
## converted to a data frame.
ci_band3$volume<-q3_data$volume ## 'x' variable added to the data frame of CI and PE band dataframe
pe_band3$volume<-q3_data$volume
ggplot(q3_data)+geom_point(aes(volume,pressure),size=2.5,color='black')+
geom_ribbon(data=ci_band3,aes(x=volume,ymin=lwr,ymax=upr,color='95% Confidence Band')
,alpha=0,size=1)+
geom_ribbon(data=pe_band3,aes(x=volume,ymin=lwr,ymax=upr,color='95% Prediction Band')
,alpha=0,size=1)+
geom_line(data=pe_band3,aes(x=volume,y=fit,color='Fit'),size=1)+
labs(x='Volume',y='Pressure',title='CI & PI Band',color="Legend")+
scale_color_manual(values=colors)+
my_theme
11
CI & PI Band
95% Confidence Band 95% Prediction Band Fit
10000
Pressure
8000
6000
Q-2.17
Data Setup
## Data reading
bp<-c(199.5,199.3,197.9,198.4,199.4,199.9,200.9,201.1,201.9,201.3,203.6,204.6,209.5,
208.6,210.7,211.9,212.2)
press<-c(20.79,20.79,22.4,22.67,23.15,23.35,23.89,23.99,24.02,24.01,25.14,26.57,
28.49,27.76,29.04,29.88,30.06)
q4_data<-data.frame(bp=bp,press=press)
q4_data
## bp press
## 1 199.5 20.79
## 2 199.3 20.79
## 3 197.9 22.40
## 4 198.4 22.67
## 5 199.4 23.15
## 6 199.9 23.35
## 7 200.9 23.89
12
## 8 201.1 23.99
## 9 201.9 24.02
## 10 201.3 24.01
## 11 203.6 25.14
## 12 204.6 26.57
## 13 209.5 28.49
## 14 208.6 27.76
## 15 210.7 29.04
## 16 211.9 29.88
## 17 212.2 30.06
Prediction equation
q4_model<-lm(bp~.,q4_data)
q4_model
##
## Call:
## lm(formula = bp ~ ., data = q4_data)
##
## Coefficients:
## (Intercept) press
## 163.333 1.606
Analysis
summary(q4_model)
##
## Call:
## lm(formula = bp ~ ., data = q4_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4013 -0.9267 -0.1009 0.5989 2.7839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 163.3333 2.7316 59.79 < 2e-16 ***
## press 1.6057 0.1083 14.83 2.28e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.308 on 15 degrees of freedom
## Multiple R-squared: 0.9362, Adjusted R-squared: 0.9319
## F-statistic: 219.9 on 1 and 15 DF, p-value: 2.279e-10
13
Hypothesis testing of the intercept and volume with 5% significance level
confint(q4_model)
## 2.5 % 97.5 %
## (Intercept) 157.510998 169.155543
## press 1.374942 1.836487
anova(q4_model)
F-statistic is very far from 219.95, i.e the Null hypothesis is rejected.
F0 = 219.95 and F0.05,1,15 = 4.5431, so here F0 > Fα,1,n−2 that’s why null hypothesis is rejected.
The Regression is significant here. Pressure is a good variable to predict Boiling Point.
ci_band4<-as.data.frame(predict(q4_model,q4_data,interval='confidence',level=0.95))
## The code here first predicts y_hat for every x present in the data using the
## model we fitted, then finds the 95% confidence interval for each of them and then that table is
## converted to a data frame.
pe_band4<-as.data.frame(predict(q4_model,q4_data,interval='prediction',level=0.95))
14
## The code here first predicts y_hat for every x present in the data, assuming these
## data points were not present while training then using the model we fitted, it finds
## the 95% prediction interval for each of them and then that table is
## converted to a data frame.
ci_band4$bp<-q4_data$press
pe_band4$bp<-q4_data$press
ggplot(q4_data)+geom_point(aes(press,bp),size=2.5,color='black')+
geom_ribbon(data=ci_band4,aes(x=press,ymin=lwr,ymax=upr,color='95% Confidence Band'),alpha=0,size=1)+
geom_ribbon(data=pe_band4,aes(x=press,ymin=lwr,ymax=upr,color='95% Prediction Band'),alpha=0,size=1)+
geom_line(data=pe_band4,aes(x=press,y=fit,color='Fit'),size=1)+
labs(x='Pressure',y='Boiling Point',title='CI & PI Band',color="Legend")+
scale_color_manual(values=colors)+
my_theme
CI & PI Band
95% Confidence Band 95% Prediction Band Fit
215
210
Boiling Point
205
200
195
15
Q 2.18
Data Setup
spent<-c(50.1,74.1,19.3,22.9,82.4,40.1,185.9,26.9,20.4,166.2,27,45.6,154.9,5,49.7,26.9,5.7,7.6,9.2,32.4,
impressions<-c(32.1,99.6,11.7,21.9,60.8,78.6,92.4,50.7,21.4,40.1,40.8,10.4,88.9,12,29.2,38,10,12.3,23.4,
q5_data<-data.frame(firms=firms,spent=spent,impressions=impressions)
q5_data$firms<-as.character(q5_data$firms)
q5_data
Prediction equation
q5_model<-lm(impressions~spent,data=q5_data)
q5_model
##
## Call:
## lm(formula = impressions ~ spent, data = q5_data)
##
## Coefficients:
## (Intercept) spent
## 22.1627 0.3632
16
Prediction equations is: yˆi = 22.1627 + 0.3632x
Analysis
summary(q5_model)
##
## Call:
## lm(formula = impressions ~ spent, data = q5_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.422 -12.623 -8.171 8.832 50.526
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.16269 7.08948 3.126 0.00556 **
## spent 0.36317 0.09712 3.739 0.00139 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.5 on 19 degrees of freedom
## Multiple R-squared: 0.424, Adjusted R-squared: 0.3936
## F-statistic: 13.98 on 1 and 19 DF, p-value: 0.001389
confint(q5_model)
## 2.5 % 97.5 %
## (Intercept) 7.324244 37.0011425
## spent 0.159899 0.5664492
17
anova(q5_model)
ci_band5<-as.data.frame(predict(q5_model,q5_data,interval='confidence',level=0.95))
## The code here first predicts y_hat for every x present in the data using the
## model we fitted, then finds the 95% confidence interval for each of them and then that table is
## converted to a data frame.
pe_band5<-as.data.frame(predict(q5_model,q5_data,interval='prediction',level=0.95))
## The code here first predicts y_hat for every x present in the data, assuming these
## data points were not present while training then using the model we fitted, it finds
## the 95% prediction interval for each of them and then that table is
## converted to a data frame.
ci_band5$spent<-q5_data$spent
pe_band5$spent<-q5_data$spent
ggplot(q5_data)+geom_point(aes(spent,impressions),size=2.5,color='black')+
geom_ribbon(data=ci_band5,aes(x=spent,ymin=lwr,ymax=upr,color='95% Confidence Band'),alpha=0,size=1)+
geom_ribbon(data=pe_band5,aes(x=spent,ymin=lwr,ymax=upr,color='95% Prediction Band'),alpha=0,size=1)+
geom_line(data=pe_band5,aes(x=spent,y=fit,color='Fit'),size=1)+
labs(x='Amount Spent(Millions)',y='Retained Impressions(Millions)',title='CI & PI Band',color="Legend"
scale_color_manual(values=colors)+
my_theme
18
CI & PI Band
95% Confidence Band 95% Prediction Band Fit
150
Retained Impressions(Millions)
100
50
0 50 100 150
Amount Spent(Millions)
MCI_data<-q5_data[8,]
MCI_CI<-predict(q5_model,MCI_data,interval = 'confidence',level=0.95)
MCI_PI<-predict(q5_model,MCI_data,interval = 'prediction',level=0.95)
Prediction interval
MCI_PI
Confidence interval
19
MCI_CI
20