You are on page 1of 7

Sampling Distributions

Mohaimen Mansur

Sampling distribution of sample mean

# Create a population of IQ scores from a normal distribution


# with mean 100 and standard deviation of 15.

popMean = 100; # The mean of our population


popSD = 15; # The standard deviation of our population
sampSize = 50; # The size of the samples we'll draw from the population

#Drawing a single sample from the infinite normal population

samp <- rnorm(sampSize, mean = popMean, sd = popSD)


mean(samp)

## [1] 99.92817

mean(samp)-popMean # sampling error

## [1] -0.0718306

sd(samp)

## [1] 14.50241

# Sampling distribution of sample mean

sampDist01 <- replicate(100000,mean(rnorm(sampSize, mean = popMean, sd = popSD)))


hist(sampDist01)# frequency distribution

1
Histogram of sampDist01
15000
Frequency

10000
5000
0

95 100 105

sampDist01

hist(sampDist01, freq=F) # density

Histogram of sampDist01
0.15
0.10
Density

0.05
0.00

95 100 105

sampDist01

2
mean(sampDist01)

## [1] 100.0011

sd(sampDist01)

## [1] 2.12362

popSD/sqrt(sampSize)

## [1] 2.12132

# Define a finite population from Normal Distribution and


# find sampling distribution of sample mean

population <- rnorm(10000, mean = popMean, sd = popSD)

hist(population) # not quiet normal? (why?)

Histogram of population
2000
Frequency

1000
500
0

40 60 80 100 120 140 160

population

mean(population)

## [1] 100.2107

3
sd(population)

## [1] 14.83644

# Drawing a single sample (sampling error)

sampChamp <- sample(population, sampSize, replace=TRUE)


mean(sampChamp)

## [1] 100.966

sd(sampChamp)

## [1] 15.28001

# sampling distribution of sample mean

sampDist <- replicate(10000,mean(sample(population, sampSize, replace=TRUE)))


hist(sampDist)

Histogram of sampDist
1500
Frequency

1000
500
0

95 100 105

sampDist

mean(sampDist)

## [1] 100.227

4
sd(sampDist)

## [1] 2.099929

Sampling distribution of sample variance

sampSize02 = 100
sampDist02 <- replicate(100000,var(rnorm(sampSize02, mean = popMean, sd = popSD)))
hist(sampDist02)# frequency distribution

Histogram of sampDist02
15000
Frequency

5000
0

100 150 200 250 300 350 400

sampDist02

hist(sampDist02, freq=F) # density

5
Histogram of sampDist02
0.012
0.008
Density

0.004
0.000

100 150 200 250 300 350 400

sampDist02

The Central Limit Theorem


The sampling distribution of sample mean will be approximately normal when sample size is large (n>=30),
irrespective of the distribution of the population

sampSize03 = 100 # Play with the sample size


sampDist03 <- replicate(100000,mean(rexp(sampSize03, rate = 0.3)))
hist(sampDist03, freq=F) # density

6
Histogram of sampDist03
1.2
1.0
0.8
Density

0.6
0.4
0.2
0.0

2.0 2.5 3.0 3.5 4.0 4.5 5.0

sampDist03

mean(sampDist03)

## [1] 3.332133

sd(sampDist03)

## [1] 0.3329996

Exercise

Derive the sampling distribution of sample mean and sample standard deviation
(with n and (n-1) in the denominator) when sampling from different population
distributions (e.g., exponential, gamma, uniform, Poission etc.).

You might also like