Professional Documents
Culture Documents
Yoshita Lakka
07/11/2022
#####LAB 1#####
x= mtcars$mpg
sample1= sample(x, size=4, replace = TRUE)
print(sample1)
data= Fish_1_
View(data)
x= data$Weight
samplea= sample(x,size=35, replace = TRUE )
sampleas= replicate(100, sample(x, size=35, replace= TRUE))
samplea_mean= replicate(100, mean(sample(x, size=35, replace= TRUE)))
popumeean= mean(data$Weight)
print(popumeean)
## [1] 398.3264
sammean= mean(samplea_mean)
print(sammean)
## [1] 394.5347
data_perch= data[data$Species=="Perch",]
y= data_perch$Weight
sampleb= sample(y, size=6, replace= TRUE)
samplesb= replicate(25, sample(y, size=6, replace=TRUE))
####LAB2####
a= data$Weight
popmean= mean(a)
popmean
## [1] 398.3264
all_possible_values= choose(50,5)
all_possible_values
## [1] 2118760
n=5
set.seed(12)
samplemean= replicate(all_possible_values, mean(sample(a, n, replace=TRUE)))
mean(samplemean)
## [1] 398.2036
popvar= var(a)
popvar
## [1] 128148.5
all_possible_values= choose(50,5)
all_possible_values
## [1] 2118760
n=5
set.seed(12)
var(samplemean)
## [1] 25464.69
####LAB3####
#Unbiasedness, consistency and effieciency of sample mean
data1= rnorm(100,50,2)
popmean= mean(data1)
popmean
## [1] 49.93766
n=5
sampmean= replicate(100, mean(sample(data1,n)))
mean(sampmean)
## [1] 50.04891
var(sampmean)
## [1] 0.5148904
n=100
sampmean1= replicate(100, mean(sample(data1,n)))
mean(sampmean1)
## [1] 49.93766
var(sampmean1)
## [1] 0
## [1] 2.993648
n=100
samplevar= replicate(100, var(sample(data1, n)))
mean(samplevar)
## [1] 2.993648
var(samplevar)
## [1] 0
x= rnorm(150,50,1)
p= (50**2) +1
q= (x**2)/150
p
## [1] 2501
sum(q)
## [1] 2512.815
###LAB 9###
View(PlantGrowth)
print(PlantGrowth)
## weight group
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.50 ctrl
## 6 4.61 ctrl
## 7 5.17 ctrl
## 8 4.53 ctrl
## 9 5.33 ctrl
## 10 5.14 ctrl
## 11 4.81 trt1
## 12 4.17 trt1
## 13 4.41 trt1
## 14 3.59 trt1
## 15 5.87 trt1
## 16 3.83 trt1
## 17 6.03 trt1
## 18 4.89 trt1
## 19 4.32 trt1
## 20 4.69 trt1
## 21 6.31 trt2
## 22 5.12 trt2
## 23 5.54 trt2
## 24 5.50 trt2
## 25 5.37 trt2
## 26 5.29 trt2
## 27 4.92 trt2
## 28 6.15 trt2
## 29 5.80 trt2
## 30 5.26 trt2
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)
my_data= data.frame(
group= rep(c("Woman", "Man"), each=9),
weight= c(women_weight, men_weight)
)
print(my_data)
## group weight
## 1 Woman 38.9
## 2 Woman 61.2
## 3 Woman 73.3
## 4 Woman 21.8
## 5 Woman 63.4
## 6 Woman 64.6
## 7 Woman 48.4
## 8 Woman 48.8
## 9 Woman 48.5
## 10 Man 67.8
## 11 Man 60.0
## 12 Man 63.4
## 13 Man 76.0
## 14 Man 89.4
## 15 Man 73.3
## 16 Man 67.3
## 17 Man 61.3
## 18 Man 62.4
library("ggpubr")
##
## Shapiro-Wilk normality test
##
## data: weight[group == "Man"]
## W = 0.86425, p-value = 0.1066
with(my_data, shapiro.test(weight[group=="Woman"]))
##
## Shapiro-Wilk normality test
##
## data: weight[group == "Woman"]
## W = 0.94266, p-value = 0.6101
##
## F test to compare two variances
##
## data: women_weight and men_weight
## F = 2.7675, num df = 8, denom df = 8, p-value = 0.1714
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6242536 12.2689506
## sample estimates:
## ratio of variances
## 2.767478
##
## Two Sample t-test
##
## data: women_weight and men_weight
## t = -2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -29.748019 -4.029759
## sample estimates:
## mean of x mean of y
## 52.10000 68.98889
t1= PlantGrowth[PlantGrowth$group=="trt1", 1]
t1
## [1] 4.81 4.17 4.41 3.59 5.87 3.83 6.03 4.89 4.32 4.69
## [1] 6.31 5.12 5.54 5.50 5.37 5.29 4.92 6.15 5.80 5.26
#Assumption 1: Normality
shapiro.test(t2)
##
## Shapiro-Wilk normality test
##
## data: t2
## W = 0.94101, p-value = 0.5643
plot(density(PlantGrowth$weight))
#Assumption 2: Independance
#Treatment 1 and two are not related
boxplot(weight~group, data= PlantGrowth, xlab="Group", ylab= "weight",
main="Boxplots of different treatments")
#Assumption 3: Equal variance
res.ftest= var.test(t1,t2)
res.ftest
##
## F test to compare two variances
##
## data: t1 and t2
## F = 3.216, num df = 9, denom df = 9, p-value = 0.0968
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7988084 12.9475894
## sample estimates:
## ratio of variances
## 3.215998
##
## Two Sample t-test
##
## data: t1 and t2
## t = -3.0101, df = 18, p-value = 0.007518
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.4687336 -0.2612664
## sample estimates:
## mean of x mean of y
## 4.661 5.526
res$conf.int
#Normality
shapiro.test(iris$Sepal.Width)
##
## Shapiro-Wilk normality test
##
## data: iris$Sepal.Width
## W = 0.98492, p-value = 0.1012
plot(density(iris$Sepal.Width))
#Independance
boxplot(Sepal.Width ~ Species, data=iris, xlab= "species", ylab="sepal
width", main="boxplot of iris")
#Equal variance
s1= iris[iris$Species=="setosa", 2]
s1
## [1] 3.5 3.0 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 3.7 3.4 3.0 3.0 4.0 4.4 3.9
3.5 3.8
## [20] 3.8 3.4 3.7 3.6 3.3 3.4 3.0 3.4 3.5 3.4 3.2 3.1 3.4 4.1 4.2 3.1 3.2
3.5 3.6
## [39] 3.0 3.4 3.5 2.3 3.2 3.5 3.8 3.0 3.8 3.2 3.7 3.3
s2= iris[iris$Species=="versicolor", 2]
s2
## [1] 3.2 3.2 3.1 2.3 2.8 2.8 3.3 2.4 2.9 2.7 2.0 3.0 2.2 2.9 2.9 3.1 3.0
2.7 2.2
## [20] 2.5 3.2 2.8 2.5 2.8 2.9 3.0 2.8 3.0 2.9 2.6 2.4 2.4 2.7 2.7 3.0 3.4
3.1 2.3
## [39] 3.0 2.5 2.6 3.0 2.6 2.3 2.7 3.0 2.9 2.9 2.5 2.8
res.ftest= var.test(s1,s2)
res.ftest
##
## F test to compare two variances
##
## data: s1 and s2
## F = 1.4592, num df = 49, denom df = 49, p-value = 0.1895
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.828080 2.571444
## sample estimates:
## ratio of variances
## 1.459233
##
## Welch Two Sample t-test
##
## data: s1 and s2
## t = 9.455, df = 94.698, p-value = 2.484e-15
## alternative hypothesis: true difference in means is not equal to 0
## 99 percent confidence interval:
## 0.4750576 0.8409424
## sample estimates:
## mean of x mean of y
## 3.428 2.770
####LAB 5####
data= rpois(n=100, lambda = 5)
data
## [1] 4 7 4 8 9 2 5 8 5 5 9 5 6 5 2 8 3 2 4 9 8 6 6
11 6
## [26] 6 5 5 4 3 9 8 6 7 1 5 6 3 4 3 3 4 4 4 3 3 3 5
4 7
## [51] 2 5 7 2 5 3 3 6 8 4 6 2 4 4 7 5 7 7 7 4 6 6 6
0 5
## [76] 3 4 5 4 2 3 6 4 7 2 4 10 8 8 3 3 6 4 6 4 3 7 2
5 5
##
## Attaching package: 'Hmisc'
##
## Attaching package: 'UsingR'
## The following object is masked from 'package:survival':
##
## cancer
plot(density(data))
#comparing likelihood of single datapoint with multiple parameter values
likelihood= dpois(data[1], lambda = seq(25))
likelihood
dflike= data.frame(
data=likelihood
)
dflike
## data
## 1 1.532831e-02
## 2 9.022352e-02
## 3 1.680314e-01
## 4 1.953668e-01
## 5 1.754674e-01
## 6 1.338526e-01
## 7 9.122619e-02
## 8 5.725229e-02
## 9 3.373716e-02
## 10 1.891664e-02
## 11 1.018873e-02
## 12 5.308599e-03
## 13 2.689886e-03
## 14 1.331000e-03
## 15 6.452627e-04
## 16 3.072961e-04
## 17 1.440716e-04
## 18 6.661593e-05
## 19 3.042342e-05
## 20 1.374102e-05
## 21 6.144433e-06
## 22 2.722707e-06
## 23 1.196539e-06
## 24 5.218746e-07
## 25 2.260408e-07
tab= data.frame(
lambda= seq(25),
data= likelihood
)
plot(tab, main="lambda vs likelihood")
data= rbinom(100, 50, 0.5)
data
## [1] 26 23 25 31 25 29 30 26 24 21 30 23 20 31 27 21 25 31 26 24 26 23 23
22 24
## [26] 33 21 20 21 27 26 29 27 27 25 26 28 28 32 24 23 24 17 22 29 22 22 20
23 27
## [51] 29 25 24 23 21 24 26 22 25 22 25 24 26 24 24 25 27 22 24 23 26 22 29
27 27
## [76] 26 24 25 29 26 29 23 27 23 26 25 23 26 30 30 23 23 33 26 30 25 24 26
21 26
plot(density(likelihood))
tab=data.frame(prob=seq(0.01, 0.1), likelihood)
tab
## prob likelihood
## 1 0.01 2.875940e-06
## 2 0.01 7.451890e-05
## 3 0.01 8.972934e-06
## 4 0.01 4.617512e-09
## 5 0.01 8.972934e-06
## 6 0.01 6.998736e-08
## 7 0.01 1.840408e-08
## 8 0.01 2.875940e-06
## 9 0.01 2.656461e-05
## 10 0.01 4.956559e-04
## 11 0.01 1.840408e-08
## 12 0.01 7.451890e-05
## 13 0.01 1.170987e-03
## 14 0.01 4.617512e-09
## 15 0.01 8.758007e-07
## 16 0.01 4.956559e-04
## 17 0.01 8.972934e-06
## 18 0.01 4.617512e-09
## 19 0.01 2.875940e-06
## 20 0.01 2.656461e-05
## 21 0.01 2.875940e-06
## 22 0.01 7.451890e-05
## 23 0.01 7.451890e-05
## 24 0.01 1.977617e-04
## 25 0.01 2.656461e-05
## 26 0.01 2.532895e-10
## 27 0.01 4.956559e-04
## 28 0.01 1.170987e-03
## 29 0.01 4.956559e-04
## 30 0.01 8.758007e-07
## 31 0.01 2.875940e-06
## 32 0.01 6.998736e-08
## 33 0.01 8.758007e-07
## 34 0.01 8.758007e-07
## 35 0.01 8.972934e-06
## 36 0.01 2.875940e-06
## 37 0.01 2.537042e-07
## 38 0.01 2.537042e-07
## 39 0.01 1.106279e-09
## 40 0.01 2.656461e-05
## 41 0.01 7.451890e-05
## 42 0.01 2.656461e-05
## 43 0.01 1.059153e-02
## 44 0.01 1.977617e-04
## 45 0.01 6.998736e-08
## 46 0.01 1.977617e-04
## 47 0.01 1.977617e-04
## 48 0.01 1.170987e-03
## 49 0.01 7.451890e-05
## 50 0.01 8.758007e-07
## 51 0.01 6.998736e-08
## 52 0.01 8.972934e-06
## 53 0.01 2.656461e-05
## 54 0.01 7.451890e-05
## 55 0.01 4.956559e-04
## 56 0.01 2.656461e-05
## 57 0.01 2.875940e-06
## 58 0.01 1.977617e-04
## 59 0.01 8.972934e-06
## 60 0.01 1.977617e-04
## 61 0.01 8.972934e-06
## 62 0.01 2.656461e-05
## 63 0.01 2.875940e-06
## 64 0.01 2.656461e-05
## 65 0.01 2.656461e-05
## 66 0.01 8.972934e-06
## 67 0.01 8.758007e-07
## 68 0.01 1.977617e-04
## 69 0.01 2.656461e-05
## 70 0.01 7.451890e-05
## 71 0.01 2.875940e-06
## 72 0.01 1.977617e-04
## 73 0.01 6.998736e-08
## 74 0.01 8.758007e-07
## 75 0.01 8.758007e-07
## 76 0.01 2.875940e-06
## 77 0.01 2.656461e-05
## 78 0.01 8.972934e-06
## 79 0.01 6.998736e-08
## 80 0.01 2.875940e-06
## 81 0.01 6.998736e-08
## 82 0.01 7.451890e-05
## 83 0.01 8.758007e-07
## 84 0.01 7.451890e-05
## 85 0.01 2.875940e-06
## 86 0.01 8.972934e-06
## 87 0.01 7.451890e-05
## 88 0.01 2.875940e-06
## 89 0.01 1.840408e-08
## 90 0.01 1.840408e-08
## 91 0.01 7.451890e-05
## 92 0.01 7.451890e-05
## 93 0.01 2.532895e-10
## 94 0.01 2.875940e-06
## 95 0.01 1.840408e-08
## 96 0.01 8.972934e-06
## 97 0.01 2.656461e-05
## 98 0.01 2.875940e-06
## 99 0.01 4.956559e-04
## 100 0.01 2.875940e-06
## $maximum
## [1] 0.6022232
##
## $objective
## [1] 1.23443e-37
x=rbinom(50,size=9,prob=0.6)
likbin=function(p,x){prod(dbinom(x,size=9,prob=p))} ###likelihood function of
binomial
optimize(likbin,interval=c(0,1),maximum=T,x=x) ####obtaining the maximimum
value of p
## $maximum
## [1] 0.6222213
##
## $objective
## [1] 2.54981e-41
x= rpois(50, lambda = 5)
likepois= function(p,x){prod(dpois(x, lambda = p))}
optimize(likepois, interval=c(0,1), maximum=T, x=x)
## $maximum
## [1] 0.9999339
##
## $objective
## [1] 6.016742e-141
x= rexp(100,1)
likexp= function(p,x){prod(dexp(100,1))}
optimize(likexp, interval= c(0,1), maximum = T, x=x)
## $maximum
## [1] 0.9999339
##
## $objective
## [1] 3.720076e-44
#maxlogL function
library(EstimationTools)
##
## DEoptim package
## Differential Evolution algorithm in R
## Authors: D. Ardia, K. Mullen, B. Peterson and J. Ulrich
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:Hmisc':
##
## %nin%
set.seed(1000)
z= rnorm(n=1000, mean=10, sd=15)
fit1= maxlogL(x=z, dist= 'dnorm', start= c(2,3), lower= c(-15,0),
upper=c(15,10))
fit1
## Call:
## maxlogL(x = z, dist = "dnorm", start = c(2, 3), lower = c(-15,
## 0), upper = c(15, 10))
##
## Results:
##
## Estimated parameters:
## mean sd
## 9.812852 10.000000
library(EstimationTools)
set.seed(1000)
z <- rnorm(n = 1000, mean = 10, sd = 15)
fit1 <- maxlogL(x = z, dist ='dnorm',start = c(2,3), lower=c(-15, 0), upper=
c(15, 10))
fit1
## Call:
## maxlogL(x = z, dist = "dnorm", start = c(2, 3), lower = c(-15,
## 0), upper = c(15, 10))
##
## Results:
##
## Estimated parameters:
## mean sd
## 9.812852 10.000000
library(EstimationTools)
set.seed(1000)
z= rpois(1000, 5)
fit1= maxlogL(x=z, dist = 'dpois', start= 2, lower = c(-15,0), upper=
c(10,15))
fit1
## Call:
## maxlogL(x = z, dist = "dpois", start = 2, lower = c(-15, 0),
## upper = c(10, 15))
##
## Results:
##
## Estimated parameters:
## lambda
## 4.99
library(EstimationTools)
set.seed(1000)
z= rexp(1000, 1)
fit= maxlogL(x=z, dist="dexp", start=2, lower= c(-15,0), upper=c(10,15))
fit
## Call:
## maxlogL(x = z, dist = "dexp", start = 2, lower = c(-15, 0), upper = c(10,
## 15))
##
## Results:
##
## Estimated parameters:
## rate
## 0.9968858
####LAB6####
#H0: p=0.5
#H1: p not equal to 0.5
prop.test(x=65, n=160, p=0.5, correct = FALSE, conf.level = 0.99)
##
## 1-sample proportions test without continuity correction
##
## data: 65 out of 160, null probability 0.5
## X-squared = 5.625, df = 1, p-value = 0.01771
## alternative hypothesis: true p is not equal to 0.5
## 99 percent confidence interval:
## 0.3119103 0.5080554
## sample estimates:
## p
## 0.40625
#H0: p= 0.095
#H1: p<0.095
prop.test(x=7, n=100, p=0.095, alternative = "less", conf.level = 0.99)
##
## 1-sample proportions test with continuity correction
##
## data: 7 out of 100, null probability 0.095
## X-squared = 0.46525, df = 1, p-value = 0.2476
## alternative hypothesis: true p is less than 0.095
## 99 percent confidence interval:
## 0.0000000 0.1603637
## sample estimates:
## p
## 0.07
##
## 1-sample proportions test without continuity correction
##
## data: 540 out of 1000, null probability 0.5
## X-squared = 6.4, df = 1, p-value = 0.01141
## alternative hypothesis: true p is not equal to 0.5
## 99 percent confidence interval:
## 0.4992726 0.5802001
## sample estimates:
## p
## 0.54
####LAB7###
#H0: p1=p2
#H1: p1 not equal to p2
prop.test(n= c(500, 400), x= c(310, 236), correct = FALSE, conf.level =
0.95 )
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(310, 236) out of c(500, 400)
## X-squared = 0.83814, df = 1, p-value = 0.3599
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.03429011 0.09429011
## sample estimates:
## prop 1 prop 2
## 0.62 0.59
data= c(-0.92, -3.87, 2.36, -0.51, -1.35, -0.33, -3.59, -1.02, 8.06, 4.17,
0.98, 1.65, -0.67,-3.21, 1.44, -1.64, 0.38, -0.21, 5.18, 5.92, 3.95, -4.74)
hist(data)
simple.freqpoly(data)
qqnorm(data)
qqline(data)
boxplot(data, horizontal = T)
plot(density(data))
shapiro.test(data)
##
## Shapiro-Wilk normality test
##
## data: data
## W = 0.96236, p-value = 0.5385
t.test(data, mu=0.5)
##
## One Sample t-test
##
## data: data
## t = 0.065979, df = 21, p-value = 0.948
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
## -0.9288499 2.0224862
## sample estimates:
## mean of x
## 0.5468182
x= mtcars$mpg
shapiro.test(x)
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.94756, p-value = 0.1229
t.test(x, mu=20)
##
## One Sample t-test
##
## data: x
## t = 0.08506, df = 31, p-value = 0.9328
## alternative hypothesis: true mean is not equal to 20
## 95 percent confidence interval:
## 17.91768 22.26357
## sample estimates:
## mean of x
## 20.09062
###LAB 8###
x= iris$Sepal.Width
shapiro.test(x)
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.98492, p-value = 0.1012
popmean= mean(x)
popmean
## [1] 3.057333
## [1] 3.064
var(sample1)
## [1] 0.1937796
library(BSDA)
##
## Attaching package: 'BSDA'
##
## One-sample z-Test
##
## data: x
## z = 2.9677, p-value = 0.003
## alternative hypothesis: true mean is not equal to 3.026
## 95 percent confidence interval:
## 3.036640 3.078027
## sample estimates:
## mean of x
## 3.057333
t.test(x, mu=3.026)
##
## One Sample t-test
##
## data: x
## t = 0.88044, df = 149, p-value = 0.38
## alternative hypothesis: true mean is not equal to 3.026
## 95 percent confidence interval:
## 2.987010 3.127656
## sample estimates:
## mean of x
## 3.057333
summarise(
count = n(),
We'll use F-test to test for homogeneity in variances. This can be performed with the
function var
.test() as follow:
res.ftest
##
##
## 0.08150656 1.60191315
## sample estimates:
## ratio of variances
## 0.3613398
The p-value of F-test is p = 0.1713596. It's greater than the significance level alpha = 0.05.
In
conclusion, there is no significant difference between the variances of the two sets of data.
There
fore, we can use the classic t-test witch assume equality of the two variances.
Question : Is there any significant difference between women and men weights?
1) Compute independent t-test - Method 1: The data are saved in two different numeric
vectors.
Compute t-test
res
##
##
## -29.748019 -4.029759
## sample estimates:
## mean of x mean of y
## 52.10000 68.98889
2) Compute independent t-test - Method 2: The data are saved in a data frame.
Compute t-test
res
##
##
## data: weight by group
## 4.029759 29.748019
## sample estimates:
## 68.98889 52.10000
If you want to test whether the average men's weight is less than the average women's
weight, typ
e this:
##
##
## -Inf 27.47924
## sample estimates:
## 68.98889 52.10000
Or, if you want to test whether the average men's weight is greater than the average
women's wei
##
##
## 6.298536 Inf
## sample estimates:
## 68.98889 52.10000
The p-value of the test is 0.01327, which is less than the significance level alpha = 0.05. We
can
conclude that men's average weight is significantly different from women's average weight
with
a p-value = 0.01327.