You are on page 1of 38

R Practice

Yoshita Lakka

07/11/2022
#####LAB 1#####
x= mtcars$mpg
sample1= sample(x, size=4, replace = TRUE)
print(sample1)

## [1] 30.4 21.4 22.8 10.4

samples= replicate(100,sample(x, size=4, replace = TRUE))


sample_mean= replicate(100,mean(sample(x, size=4, replace = TRUE)))
sample_var= replicate(100, var(sample(x, size=4, replace = TRUE)))
sample_range= replicate(100,diff(range(sample(x, size=4, replace= TRUE))))

histogram1= hist(sample_mean, main="Histogram of sample mean", xlab="sample


mean", ylab=" frequency", col= "sky blue")

histogram2= hist(sample_var, main="Histogram of sample variance", xlab=


"sample variance", col="pink")
histogram3= hist(sample_range, main= "Histogram of sample range")
library(readxl)
Fish_1_ <- read_excel("C:/Users/lvive/Downloads/Fish (1).xlsx")
View(Fish_1_)

data= Fish_1_
View(data)

x= data$Weight
samplea= sample(x,size=35, replace = TRUE )
sampleas= replicate(100, sample(x, size=35, replace= TRUE))
samplea_mean= replicate(100, mean(sample(x, size=35, replace= TRUE)))

popumeean= mean(data$Weight)
print(popumeean)

## [1] 398.3264

sammean= mean(samplea_mean)
print(sammean)

## [1] 394.5347

data_perch= data[data$Species=="Perch",]
y= data_perch$Weight
sampleb= sample(y, size=6, replace= TRUE)
samplesb= replicate(25, sample(y, size=6, replace=TRUE))

####LAB2####
a= data$Weight
popmean= mean(a)
popmean

## [1] 398.3264

all_possible_values= choose(50,5)
all_possible_values

## [1] 2118760

n=5
set.seed(12)
samplemean= replicate(all_possible_values, mean(sample(a, n, replace=TRUE)))
mean(samplemean)

## [1] 398.2036

popvar= var(a)
popvar

## [1] 128148.5

all_possible_values= choose(50,5)
all_possible_values
## [1] 2118760

n=5
set.seed(12)
var(samplemean)

## [1] 25464.69

####LAB3####
#Unbiasedness, consistency and effieciency of sample mean
data1= rnorm(100,50,2)
popmean= mean(data1)
popmean

## [1] 49.93766

n=5
sampmean= replicate(100, mean(sample(data1,n)))
mean(sampmean)

## [1] 50.04891

var(sampmean)

## [1] 0.5148904

n=100
sampmean1= replicate(100, mean(sample(data1,n)))
mean(sampmean1)

## [1] 49.93766

var(sampmean1)

## [1] 0

#Unbiasedness, consistency and effieciency of sample variance


popvar= var(data1)
popvar

## [1] 2.993648

n=100
samplevar= replicate(100, var(sample(data1, n)))
mean(samplevar)

## [1] 2.993648

var(samplevar)

## [1] 0

x= rnorm(150,50,1)
p= (50**2) +1
q= (x**2)/150
p

## [1] 2501

sum(q)

## [1] 2512.815

###LAB 9###
View(PlantGrowth)
print(PlantGrowth)

## weight group
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.50 ctrl
## 6 4.61 ctrl
## 7 5.17 ctrl
## 8 4.53 ctrl
## 9 5.33 ctrl
## 10 5.14 ctrl
## 11 4.81 trt1
## 12 4.17 trt1
## 13 4.41 trt1
## 14 3.59 trt1
## 15 5.87 trt1
## 16 3.83 trt1
## 17 6.03 trt1
## 18 4.89 trt1
## 19 4.32 trt1
## 20 4.69 trt1
## 21 6.31 trt2
## 22 5.12 trt2
## 23 5.54 trt2
## 24 5.50 trt2
## 25 5.37 trt2
## 26 5.29 trt2
## 27 4.92 trt2
## 28 6.15 trt2
## 29 5.80 trt2
## 30 5.26 trt2

women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)

my_data= data.frame(
group= rep(c("Woman", "Man"), each=9),
weight= c(women_weight, men_weight)
)
print(my_data)

## group weight
## 1 Woman 38.9
## 2 Woman 61.2
## 3 Woman 73.3
## 4 Woman 21.8
## 5 Woman 63.4
## 6 Woman 64.6
## 7 Woman 48.4
## 8 Woman 48.8
## 9 Woman 48.5
## 10 Man 67.8
## 11 Man 60.0
## 12 Man 63.4
## 13 Man 76.0
## 14 Man 89.4
## 15 Man 73.3
## 16 Man 67.3
## 17 Man 61.3
## 18 Man 62.4

library("ggpubr")

## Warning: package 'ggpubr' was built under R version 4.1.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.1.3

ggboxplot(my_data,main="boxplot of male and female weights" , x= "group",


y="weight", xlab="Groups", ylab="Weights")
with(my_data, shapiro.test(weight[group == "Man"]))

##
## Shapiro-Wilk normality test
##
## data: weight[group == "Man"]
## W = 0.86425, p-value = 0.1066

with(my_data, shapiro.test(weight[group=="Woman"]))

##
## Shapiro-Wilk normality test
##
## data: weight[group == "Woman"]
## W = 0.94266, p-value = 0.6101

res.ftest= var.test(women_weight, men_weight)


res.ftest

##
## F test to compare two variances
##
## data: women_weight and men_weight
## F = 2.7675, num df = 8, denom df = 8, p-value = 0.1714
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6242536 12.2689506
## sample estimates:
## ratio of variances
## 2.767478

result=t.test(women_weight, men_weight, var.equal = TRUE)


result

##
## Two Sample t-test
##
## data: women_weight and men_weight
## t = -2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -29.748019 -4.029759
## sample estimates:
## mean of x mean of y
## 52.10000 68.98889

t1= PlantGrowth[PlantGrowth$group=="trt1", 1]
t1

## [1] 4.81 4.17 4.41 3.59 5.87 3.83 6.03 4.89 4.32 4.69

t2= PlantGrowth[PlantGrowth$group== "trt2", 1]


t2

## [1] 6.31 5.12 5.54 5.50 5.37 5.29 4.92 6.15 5.80 5.26

#Assumption 1: Normality
shapiro.test(t2)

##
## Shapiro-Wilk normality test
##
## data: t2
## W = 0.94101, p-value = 0.5643

plot(density(PlantGrowth$weight))
#Assumption 2: Independance
#Treatment 1 and two are not related
boxplot(weight~group, data= PlantGrowth, xlab="Group", ylab= "weight",
main="Boxplots of different treatments")
#Assumption 3: Equal variance
res.ftest= var.test(t1,t2)
res.ftest

##
## F test to compare two variances
##
## data: t1 and t2
## F = 3.216, num df = 9, denom df = 9, p-value = 0.0968
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7988084 12.9475894
## sample estimates:
## ratio of variances
## 3.215998

res= t.test(t1,t2, var.equal = TRUE)


res

##
## Two Sample t-test
##
## data: t1 and t2
## t = -3.0101, df = 18, p-value = 0.007518
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.4687336 -0.2612664
## sample estimates:
## mean of x mean of y
## 4.661 5.526

res$conf.int

## [1] -1.4687336 -0.2612664


## attr(,"conf.level")
## [1] 0.95

#Normality
shapiro.test(iris$Sepal.Width)

##
## Shapiro-Wilk normality test
##
## data: iris$Sepal.Width
## W = 0.98492, p-value = 0.1012

plot(density(iris$Sepal.Width))

#Independance
boxplot(Sepal.Width ~ Species, data=iris, xlab= "species", ylab="sepal
width", main="boxplot of iris")
#Equal variance
s1= iris[iris$Species=="setosa", 2]
s1

## [1] 3.5 3.0 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 3.7 3.4 3.0 3.0 4.0 4.4 3.9
3.5 3.8
## [20] 3.8 3.4 3.7 3.6 3.3 3.4 3.0 3.4 3.5 3.4 3.2 3.1 3.4 4.1 4.2 3.1 3.2
3.5 3.6
## [39] 3.0 3.4 3.5 2.3 3.2 3.5 3.8 3.0 3.8 3.2 3.7 3.3

s2= iris[iris$Species=="versicolor", 2]
s2

## [1] 3.2 3.2 3.1 2.3 2.8 2.8 3.3 2.4 2.9 2.7 2.0 3.0 2.2 2.9 2.9 3.1 3.0
2.7 2.2
## [20] 2.5 3.2 2.8 2.5 2.8 2.9 3.0 2.8 3.0 2.9 2.6 2.4 2.4 2.7 2.7 3.0 3.4
3.1 2.3
## [39] 3.0 2.5 2.6 3.0 2.6 2.3 2.7 3.0 2.9 2.9 2.5 2.8

res.ftest= var.test(s1,s2)
res.ftest

##
## F test to compare two variances
##
## data: s1 and s2
## F = 1.4592, num df = 49, denom df = 49, p-value = 0.1895
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.828080 2.571444
## sample estimates:
## ratio of variances
## 1.459233

res=t.test(s1 , s2 ,var.test= TRUE, conf.level = 0.99)


res

##
## Welch Two Sample t-test
##
## data: s1 and s2
## t = 9.455, df = 94.698, p-value = 2.484e-15
## alternative hypothesis: true difference in means is not equal to 0
## 99 percent confidence interval:
## 0.4750576 0.8409424
## sample estimates:
## mean of x mean of y
## 3.428 2.770

####LAB 5####
data= rpois(n=100, lambda = 5)
data

## [1] 4 7 4 8 9 2 5 8 5 5 9 5 6 5 2 8 3 2 4 9 8 6 6
11 6
## [26] 6 5 5 4 3 9 8 6 7 1 5 6 3 4 3 3 4 4 4 3 3 3 5
4 7
## [51] 2 5 7 2 5 3 3 6 8 4 6 2 4 4 7 5 7 7 7 4 6 6 6
0 5
## [76] 3 4 5 4 2 3 6 4 7 2 4 10 8 8 3 3 6 4 6 4 3 7 2
5 5

hist(data, main = "hist of rv in poisson dist", xlab= "random variables",


ylab= "frequency", col=rainbow(6))
library(UsingR)

## Warning: package 'UsingR' was built under R version 4.1.3

## Loading required package: MASS

## Loading required package: HistData

## Loading required package: Hmisc

## Warning: package 'Hmisc' was built under R version 4.1.3

## Loading required package: lattice

## Warning: package 'lattice' was built under R version 4.1.3

## Loading required package: survival

## Loading required package: Formula

##
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':


##
## format.pval, units

##
## Attaching package: 'UsingR'
## The following object is masked from 'package:survival':
##
## cancer

simple.freqpoly(data,main = "freq poly of rv in poisson dist", xlab= "random


variables", ylab= "frequency", col=rainbow(6))

plot(density(data))
#comparing likelihood of single datapoint with multiple parameter values
likelihood= dpois(data[1], lambda = seq(25))
likelihood

## [1] 1.532831e-02 9.022352e-02 1.680314e-01 1.953668e-01 1.754674e-01


## [6] 1.338526e-01 9.122619e-02 5.725229e-02 3.373716e-02 1.891664e-02
## [11] 1.018873e-02 5.308599e-03 2.689886e-03 1.331000e-03 6.452627e-04
## [16] 3.072961e-04 1.440716e-04 6.661593e-05 3.042342e-05 1.374102e-05
## [21] 6.144433e-06 2.722707e-06 1.196539e-06 5.218746e-07 2.260408e-07

dflike= data.frame(
data=likelihood
)
dflike

## data
## 1 1.532831e-02
## 2 9.022352e-02
## 3 1.680314e-01
## 4 1.953668e-01
## 5 1.754674e-01
## 6 1.338526e-01
## 7 9.122619e-02
## 8 5.725229e-02
## 9 3.373716e-02
## 10 1.891664e-02
## 11 1.018873e-02
## 12 5.308599e-03
## 13 2.689886e-03
## 14 1.331000e-03
## 15 6.452627e-04
## 16 3.072961e-04
## 17 1.440716e-04
## 18 6.661593e-05
## 19 3.042342e-05
## 20 1.374102e-05
## 21 6.144433e-06
## 22 2.722707e-06
## 23 1.196539e-06
## 24 5.218746e-07
## 25 2.260408e-07

simple.freqpoly(likelihood, main="fp", xlab="likelihood", ylab="freq",


col=rainbow(6))

tab= data.frame(
lambda= seq(25),
data= likelihood
)
plot(tab, main="lambda vs likelihood")
data= rbinom(100, 50, 0.5)
data

## [1] 26 23 25 31 25 29 30 26 24 21 30 23 20 31 27 21 25 31 26 24 26 23 23
22 24
## [26] 33 21 20 21 27 26 29 27 27 25 26 28 28 32 24 23 24 17 22 29 22 22 20
23 27
## [51] 29 25 24 23 21 24 26 22 25 22 25 24 26 24 24 25 27 22 24 23 26 22 29
27 27
## [76] 26 24 25 29 26 29 23 27 23 26 25 23 26 30 30 23 23 33 26 30 25 24 26
21 26

hist(data, main="freq poly", xlab="rv in binomial", ylab="freq",


col=rainbow(6))
simple.freqpoly(data, main="histogram", xlab="rv in binomial", ylab="freq",
col=rainbow(6))
plot(density(data))

likelihood= dbinom(data, size=100, prob=seq(0.1,0.01))


likelihood

## [1] 2.875940e-06 7.451890e-05 8.972934e-06 4.617512e-09 8.972934e-06


## [6] 6.998736e-08 1.840408e-08 2.875940e-06 2.656461e-05 4.956559e-04
## [11] 1.840408e-08 7.451890e-05 1.170987e-03 4.617512e-09 8.758007e-07
## [16] 4.956559e-04 8.972934e-06 4.617512e-09 2.875940e-06 2.656461e-05
## [21] 2.875940e-06 7.451890e-05 7.451890e-05 1.977617e-04 2.656461e-05
## [26] 2.532895e-10 4.956559e-04 1.170987e-03 4.956559e-04 8.758007e-07
## [31] 2.875940e-06 6.998736e-08 8.758007e-07 8.758007e-07 8.972934e-06
## [36] 2.875940e-06 2.537042e-07 2.537042e-07 1.106279e-09 2.656461e-05
## [41] 7.451890e-05 2.656461e-05 1.059153e-02 1.977617e-04 6.998736e-08
## [46] 1.977617e-04 1.977617e-04 1.170987e-03 7.451890e-05 8.758007e-07
## [51] 6.998736e-08 8.972934e-06 2.656461e-05 7.451890e-05 4.956559e-04
## [56] 2.656461e-05 2.875940e-06 1.977617e-04 8.972934e-06 1.977617e-04
## [61] 8.972934e-06 2.656461e-05 2.875940e-06 2.656461e-05 2.656461e-05
## [66] 8.972934e-06 8.758007e-07 1.977617e-04 2.656461e-05 7.451890e-05
## [71] 2.875940e-06 1.977617e-04 6.998736e-08 8.758007e-07 8.758007e-07
## [76] 2.875940e-06 2.656461e-05 8.972934e-06 6.998736e-08 2.875940e-06
## [81] 6.998736e-08 7.451890e-05 8.758007e-07 7.451890e-05 2.875940e-06
## [86] 8.972934e-06 7.451890e-05 2.875940e-06 1.840408e-08 1.840408e-08
## [91] 7.451890e-05 7.451890e-05 2.532895e-10 2.875940e-06 1.840408e-08
## [96] 8.972934e-06 2.656461e-05 2.875940e-06 4.956559e-04 2.875940e-06
simple.freqpoly(likelihood, main="freq poly", xlab= "likelihood",
ylab="freq")

plot(density(likelihood))
tab=data.frame(prob=seq(0.01, 0.1), likelihood)
tab

## prob likelihood
## 1 0.01 2.875940e-06
## 2 0.01 7.451890e-05
## 3 0.01 8.972934e-06
## 4 0.01 4.617512e-09
## 5 0.01 8.972934e-06
## 6 0.01 6.998736e-08
## 7 0.01 1.840408e-08
## 8 0.01 2.875940e-06
## 9 0.01 2.656461e-05
## 10 0.01 4.956559e-04
## 11 0.01 1.840408e-08
## 12 0.01 7.451890e-05
## 13 0.01 1.170987e-03
## 14 0.01 4.617512e-09
## 15 0.01 8.758007e-07
## 16 0.01 4.956559e-04
## 17 0.01 8.972934e-06
## 18 0.01 4.617512e-09
## 19 0.01 2.875940e-06
## 20 0.01 2.656461e-05
## 21 0.01 2.875940e-06
## 22 0.01 7.451890e-05
## 23 0.01 7.451890e-05
## 24 0.01 1.977617e-04
## 25 0.01 2.656461e-05
## 26 0.01 2.532895e-10
## 27 0.01 4.956559e-04
## 28 0.01 1.170987e-03
## 29 0.01 4.956559e-04
## 30 0.01 8.758007e-07
## 31 0.01 2.875940e-06
## 32 0.01 6.998736e-08
## 33 0.01 8.758007e-07
## 34 0.01 8.758007e-07
## 35 0.01 8.972934e-06
## 36 0.01 2.875940e-06
## 37 0.01 2.537042e-07
## 38 0.01 2.537042e-07
## 39 0.01 1.106279e-09
## 40 0.01 2.656461e-05
## 41 0.01 7.451890e-05
## 42 0.01 2.656461e-05
## 43 0.01 1.059153e-02
## 44 0.01 1.977617e-04
## 45 0.01 6.998736e-08
## 46 0.01 1.977617e-04
## 47 0.01 1.977617e-04
## 48 0.01 1.170987e-03
## 49 0.01 7.451890e-05
## 50 0.01 8.758007e-07
## 51 0.01 6.998736e-08
## 52 0.01 8.972934e-06
## 53 0.01 2.656461e-05
## 54 0.01 7.451890e-05
## 55 0.01 4.956559e-04
## 56 0.01 2.656461e-05
## 57 0.01 2.875940e-06
## 58 0.01 1.977617e-04
## 59 0.01 8.972934e-06
## 60 0.01 1.977617e-04
## 61 0.01 8.972934e-06
## 62 0.01 2.656461e-05
## 63 0.01 2.875940e-06
## 64 0.01 2.656461e-05
## 65 0.01 2.656461e-05
## 66 0.01 8.972934e-06
## 67 0.01 8.758007e-07
## 68 0.01 1.977617e-04
## 69 0.01 2.656461e-05
## 70 0.01 7.451890e-05
## 71 0.01 2.875940e-06
## 72 0.01 1.977617e-04
## 73 0.01 6.998736e-08
## 74 0.01 8.758007e-07
## 75 0.01 8.758007e-07
## 76 0.01 2.875940e-06
## 77 0.01 2.656461e-05
## 78 0.01 8.972934e-06
## 79 0.01 6.998736e-08
## 80 0.01 2.875940e-06
## 81 0.01 6.998736e-08
## 82 0.01 7.451890e-05
## 83 0.01 8.758007e-07
## 84 0.01 7.451890e-05
## 85 0.01 2.875940e-06
## 86 0.01 8.972934e-06
## 87 0.01 7.451890e-05
## 88 0.01 2.875940e-06
## 89 0.01 1.840408e-08
## 90 0.01 1.840408e-08
## 91 0.01 7.451890e-05
## 92 0.01 7.451890e-05
## 93 0.01 2.532895e-10
## 94 0.01 2.875940e-06
## 95 0.01 1.840408e-08
## 96 0.01 8.972934e-06
## 97 0.01 2.656461e-05
## 98 0.01 2.875940e-06
## 99 0.01 4.956559e-04
## 100 0.01 2.875940e-06

plot(tab, main="p v/s likelihood")


####LAB 5####
#Obtaining MLE using optimize function
x= rbinom(50, size=9, prob=0.6)
likbin= function(p,x){prod(dbinom(x, size=9, prob=p))}
optimize(likbin, interval = c(0,1), maximum = T, x=x)

## $maximum
## [1] 0.6022232
##
## $objective
## [1] 1.23443e-37

x=rbinom(50,size=9,prob=0.6)
likbin=function(p,x){prod(dbinom(x,size=9,prob=p))} ###likelihood function of
binomial
optimize(likbin,interval=c(0,1),maximum=T,x=x) ####obtaining the maximimum
value of p

## $maximum
## [1] 0.6222213
##
## $objective
## [1] 2.54981e-41

x= rbinom(50, size=9, prob=0.6)


likbin= function(p,x){prod(dbinom(x, size=9, prob= p))}
optimize(likbin, interval=c(0,1), maximum=T, x=x)
## $maximum
## [1] 0.6066697
##
## $objective
## [1] 1.808841e-42

x= rpois(50, lambda = 5)
likepois= function(p,x){prod(dpois(x, lambda = p))}
optimize(likepois, interval=c(0,1), maximum=T, x=x)

## $maximum
## [1] 0.9999339
##
## $objective
## [1] 6.016742e-141

x= rexp(100,1)
likexp= function(p,x){prod(dexp(100,1))}
optimize(likexp, interval= c(0,1), maximum = T, x=x)

## $maximum
## [1] 0.9999339
##
## $objective
## [1] 3.720076e-44

#maxlogL function
library(EstimationTools)

## Warning: package 'EstimationTools' was built under R version 4.1.3

## Loading required package: DEoptim

## Warning: package 'DEoptim' was built under R version 4.1.3

## Loading required package: parallel

##
## DEoptim package
## Differential Evolution algorithm in R
## Authors: D. Ardia, K. Mullen, B. Peterson and J. Ulrich

## Loading required package: stringr

## Loading required package: BBmisc

## Warning: package 'BBmisc' was built under R version 4.1.3

##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:Hmisc':
##
## %nin%

## The following object is masked from 'package:base':


##
## isFALSE

## ><<<<<<<<<<<<<<<<<<<<<<<< EstimationTools Version 2.1.0


>>>>>>>>>>>>>>>>>>>>>>>><
## Feel free to report bugs in
https://github.com/Jaimemosg/EstimationTools/issues

set.seed(1000)
z= rnorm(n=1000, mean=10, sd=15)
fit1= maxlogL(x=z, dist= 'dnorm', start= c(2,3), lower= c(-15,0),
upper=c(15,10))
fit1

## Call:
## maxlogL(x = z, dist = "dnorm", start = c(2, 3), lower = c(-15,
## 0), upper = c(15, 10))
##
## Results:
##
## Estimated parameters:
## mean sd
## 9.812852 10.000000

library(EstimationTools)
set.seed(1000)
z <- rnorm(n = 1000, mean = 10, sd = 15)
fit1 <- maxlogL(x = z, dist ='dnorm',start = c(2,3), lower=c(-15, 0), upper=
c(15, 10))
fit1

## Call:
## maxlogL(x = z, dist = "dnorm", start = c(2, 3), lower = c(-15,
## 0), upper = c(15, 10))
##
## Results:
##
## Estimated parameters:
## mean sd
## 9.812852 10.000000

library(EstimationTools)
set.seed(1000)
z= rpois(1000, 5)
fit1= maxlogL(x=z, dist = 'dpois', start= 2, lower = c(-15,0), upper=
c(10,15))
fit1

## Call:
## maxlogL(x = z, dist = "dpois", start = 2, lower = c(-15, 0),
## upper = c(10, 15))
##
## Results:
##
## Estimated parameters:
## lambda
## 4.99

library(EstimationTools)
set.seed(1000)
z= rexp(1000, 1)
fit= maxlogL(x=z, dist="dexp", start=2, lower= c(-15,0), upper=c(10,15))
fit

## Call:
## maxlogL(x = z, dist = "dexp", start = 2, lower = c(-15, 0), upper = c(10,
## 15))
##
## Results:
##
## Estimated parameters:
## rate
## 0.9968858

####LAB6####
#H0: p=0.5
#H1: p not equal to 0.5
prop.test(x=65, n=160, p=0.5, correct = FALSE, conf.level = 0.99)

##
## 1-sample proportions test without continuity correction
##
## data: 65 out of 160, null probability 0.5
## X-squared = 5.625, df = 1, p-value = 0.01771
## alternative hypothesis: true p is not equal to 0.5
## 99 percent confidence interval:
## 0.3119103 0.5080554
## sample estimates:
## p
## 0.40625

#H0: p= 0.095
#H1: p<0.095
prop.test(x=7, n=100, p=0.095, alternative = "less", conf.level = 0.99)
##
## 1-sample proportions test with continuity correction
##
## data: 7 out of 100, null probability 0.095
## X-squared = 0.46525, df = 1, p-value = 0.2476
## alternative hypothesis: true p is less than 0.095
## 99 percent confidence interval:
## 0.0000000 0.1603637
## sample estimates:
## p
## 0.07

#accept null hypothesis

prop.test(x=540, n=1000, correct = FALSE, conf.level = 0.99)

##
## 1-sample proportions test without continuity correction
##
## data: 540 out of 1000, null probability 0.5
## X-squared = 6.4, df = 1, p-value = 0.01141
## alternative hypothesis: true p is not equal to 0.5
## 99 percent confidence interval:
## 0.4992726 0.5802001
## sample estimates:
## p
## 0.54

####LAB7###
#H0: p1=p2
#H1: p1 not equal to p2
prop.test(n= c(500, 400), x= c(310, 236), correct = FALSE, conf.level =
0.95 )

##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(310, 236) out of c(500, 400)
## X-squared = 0.83814, df = 1, p-value = 0.3599
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.03429011 0.09429011
## sample estimates:
## prop 1 prop 2
## 0.62 0.59

data= c(-0.92, -3.87, 2.36, -0.51, -1.35, -0.33, -3.59, -1.02, 8.06, 4.17,
0.98, 1.65, -0.67,-3.21, 1.44, -1.64, 0.38, -0.21, 5.18, 5.92, 3.95, -4.74)
hist(data)
simple.freqpoly(data)
qqnorm(data)
qqline(data)

boxplot(data, horizontal = T)
plot(density(data))

shapiro.test(data)
##
## Shapiro-Wilk normality test
##
## data: data
## W = 0.96236, p-value = 0.5385

t.test(data, mu=0.5)

##
## One Sample t-test
##
## data: data
## t = 0.065979, df = 21, p-value = 0.948
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
## -0.9288499 2.0224862
## sample estimates:
## mean of x
## 0.5468182

x= mtcars$mpg
shapiro.test(x)

##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.94756, p-value = 0.1229

t.test(x, mu=20)

##
## One Sample t-test
##
## data: x
## t = 0.08506, df = 31, p-value = 0.9328
## alternative hypothesis: true mean is not equal to 20
## 95 percent confidence interval:
## 17.91768 22.26357
## sample estimates:
## mean of x
## 20.09062

###LAB 8###
x= iris$Sepal.Width
shapiro.test(x)

##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.98492, p-value = 0.1012
popmean= mean(x)
popmean

## [1] 3.057333

sample1= sample(x, 50)


mean(sample1)

## [1] 3.064

var(sample1)

## [1] 0.1937796

library(BSDA)

## Warning: package 'BSDA' was built under R version 4.1.3

##
## Attaching package: 'BSDA'

## The following object is masked from 'package:HistData':


##
## Wheat

## The following object is masked from 'package:datasets':


##
## Orange

z.test(x, mu=3.026, sigma.x=0.1293102)

##
## One-sample z-Test
##
## data: x
## z = 2.9677, p-value = 0.003
## alternative hypothesis: true mean is not equal to 3.026
## 95 percent confidence interval:
## 3.036640 3.078027
## sample estimates:
## mean of x
## 3.057333

t.test(x, mu=3.026)

##
## One Sample t-test
##
## data: x
## t = 0.88044, df = 149, p-value = 0.38
## alternative hypothesis: true mean is not equal to 3.026
## 95 percent confidence interval:
## 2.987010 3.127656
## sample estimates:
## mean of x
## 3.057333

group_by(my_data, group) %>%

summarise(

count = n(),

mean = mean(weight, na.rm = TRUE),

sd = sd(weight, na.rm = TRUE)

We'll use F-test to test for homogeneity in variances. This can be performed with the
function var

.test() as follow:

res.ftest <- var.test(weight ~ group, data = my_data)

res.ftest

##

## F test to compare two variances

##

## data: weight by group

## F = 0.36134, num df = 8, denom df = 8, p-value = 0.1714

## alternative hypothesis: true ratio of variances is not equal to 1

## 95 percent confidence interval:

## 0.08150656 1.60191315

## sample estimates:

## ratio of variances

## 0.3613398

The p-value of F-test is p = 0.1713596. It's greater than the significance level alpha = 0.05.
In
conclusion, there is no significant difference between the variances of the two sets of data.
There

fore, we can use the classic t-test witch assume equality of the two variances.

Question : Is there any significant difference between women and men weights?

1) Compute independent t-test - Method 1: The data are saved in two different numeric
vectors.

Compute t-test

res <- t.test(women_weight, men_weight, var.equal = TRUE)

res

##

## Two Sample t-test

##

## data: women_weight and men_weight

## t = -2.7842, df = 16, p-value = 0.01327

## alternative hypothesis: true difference in means is not equal to 0

## 95 percent confidence interval:

## -29.748019 -4.029759

## sample estimates:

## mean of x mean of y

## 52.10000 68.98889

2) Compute independent t-test - Method 2: The data are saved in a data frame.

Compute t-test

res <- t.test(weight ~ group, data = my_data, var.equal = TRUE)

res

##

## Two Sample t-test

##
## data: weight by group

## t = 2.7842, df = 16, p-value = 0.01327

## alternative hypothesis: true difference in means is not equal to 0

## 95 percent confidence interval:

## 4.029759 29.748019

## sample estimates:

## mean in group Man mean in group Woman

## 68.98889 52.10000

If you want to test whether the average men's weight is less than the average women's
weight, typ

e this:

t.test(weight ~ group, data = my_data,

var.equal = TRUE, alternative = "less")

##

## Two Sample t-test

##

## data: weight by group

## t = 2.7842, df = 16, p-value = 0.9934

## alternative hypothesis: true difference in means is less than 0

## 95 percent confidence interval:

## -Inf 27.47924

## sample estimates:

## mean in group Man mean in group Woman

## 68.98889 52.10000

Or, if you want to test whether the average men's weight is greater than the average
women's wei

ght, type this

t.test(weight ~ group, data = my_data,


var.equal = TRUE, alternative = "greater")

##

## Two Sample t-test

##

## data: weight by group

## t = 2.7842, df = 16, p-value = 0.006633

## alternative hypothesis: true difference in means is greater than 0

## 95 percent confidence interval:

## 6.298536 Inf

## sample estimates:

## mean in group Man mean in group Woman

## 68.98889 52.10000

The p-value of the test is 0.01327, which is less than the significance level alpha = 0.05. We
can

conclude that men's average weight is significantly different from women's average weight
with

a p-value = 0.01327.

You might also like