You are on page 1of 20

Statistics / Test # 2 / Solutions

Michael King
Fall 2019

Question 1
X is a binomial random variable with parameters p = 0.5 and n = 30. Plot the probability function of X
# Define the distribution parameters
n <- 30
p <- .5

# Define the probability function


f <- function(x){dbinom(x, size = n, prob = p)}

# Build a data_frame for plotting


M <-
data_frame(
x = seq(from = 0, to = n, by = 1),
y = f(x))

G <-
ggplot(
data = M) +
geom_col(
mapping = aes(x = x, y = y, fill = y),
color = 'black',
size = 0.2,
width = 1) +
scale_fill_gradientn(
colors = rev(heat.colors(6)))+
scale_x_continuous(
breaks = seq(from = 0, to = n, by = floor(n / 5))) +
labs(
x = 'random variable',
y = NULL,
title = 'A Binomial Probability Function / n = 10, p = 0.5',
subtitle = NULL,
caption = 'Michael King / Statistics I / Test #2') +
theme_bw() +
theme(
legend.position = 'none',
axis.title.x = element_text(vjust = -3),
axis.title.y = element_text(vjust = +3),
plot.margin = margin(unit ='cm', b = 1, l = 1),
plot.title = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.ticks = element_blank(),
text = element_text(
size = 18,
color = 'black',
face = 'italic',
family = 'serif'))

G
A Binomial Probability Function / n = 10, p = 0.5
0.15

0.10

0.05

0.00

0 6 12 18 24 30
random variable Michael King / Statistics I / Test #2
Question 2
Generate a random sample of size 1,000,000 from a binomial distribution with parameters n = 30 and p = 0.439 and graph the
cumulative empirical distribution.
# Set the distribution parameters
n <- 30
p = .439

# Generate random numbers


rb <- rbinom(n = 1e6, size = n, prob = p)

# Create a table
tb <- table(rb)

# Sort the values


nb <- sort(unique(rb))

# Compute relative cumulative frequencies


cb <- cumsum(tb) /length(rb)

# Create a data_frame
M <- data_frame(nb, tb, cb)

G <-
ggplot(
data = M) +
geom_col(
mapping = aes(x = nb, y = cb, fill = cb),
color = 'black',
width = 1,
size = 0.2) +
scale_fill_gradientn(
colors = rev(heat.colors(6))) +
scale_x_continuous(
breaks = seq(from = min(rb), to = max(rb), by = 2)) +
labs(
x = NULL,
y = 'Relative Cumulative Frequency',
title = 'Empirical Cumulative Distribution Function',
subtitle = NULL,
caption = 'Michael King / Statistics I / Fall 2019') +
theme_bw() +
theme(
legend.position = 'none',
axis.title.x = element_text(vjust = -3),
axis.title.y = element_text(vjust = +3),
plot.margin = margin(unit ='cm', b = 1, l = 1),
plot.title = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.ticks = element_blank(),
text = element_text(
size = 18,
color = 'black',
face = 'italic',
family = 'serif'))

G
Empirical Cumulative Distribution Function
1.00

0.75
Relative Cumulative Frequency

0.50

0.25

0.00

1 3 5 7 9 11 13 15 17 19 21 23 25
Michael King / Statistics I / Fall 2019
Question 3
• Let X be a geometric random variable with parameter p = 0.25.

• Generate 1000 random numbers and create a histogram


# Set the parameter
p <- 0.25

# Set the sample size


n = 1000

# Generate a random sample of size 1000 from X and plot the histogram
rn <- rgeom(n = n, prob = p)

# Define the histogram breaks


br <- seq(from = min(rn) - 0.5, to = max(rn) + 0.5, by = 1)

# Create a data_frame
M <- data_frame(x = rn)

# Create the histogram


G <-
ggplot(
data = M) +
geom_histogram(mapping = aes(x = rn, y = ..density.., fill = ..density..),
breaks = br,
color = 'black',
size = 0.3) +
scale_fill_gradientn(
colors = rev(heat.colors(20))) +
labs(
title = 'Relative Frequency Histogram of Geometric Data',
subtitle = NULL,
x = NULL,
y = NULL,
caption = 'Michael King / Statistics I / Fall 2019') +
xlim(
lower = min(rn) - 0.5,
upper = max(rn) + 0.5) +
theme_bw() +
theme(
legend.position = 'none',
axis.title.x = element_text(vjust = -3),
axis.title.y = element_text(vjust = +3),
plot.margin = margin(unit ='cm', b = 1, l = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA),
axis.ticks = element_blank(),
text = element_text(
size = 18,
color = 'black',
face = 'italic',
family = 'serif'))

G
Relative Frequency Histogram of Geometric Data

0.2

0.1

0.0

0 10 20
Michael King / Statistics I / Fall 2019
Question 4
• Let Y be a negative binomial random variable with parameters r = 6 and p = 0.5

• Plot the probability function for Y from Y = 0 to Y = 20


# Define the distribution parameters
r <- 6
p = .5

# Create the data


M <-
data_frame(
x = seq(from = 0, to = 20, by = 1),
y = dnbinom(x = x, size = r, prob = p))

G <-
ggplot(
data = M) +
geom_col(
mapping = aes(x = x, y = y, fill = y),
color = 'black',
size = 0.2,
width = 1) +
scale_fill_gradientn(
colors = rev(heat.colors(10))) +
labs(
x = NULL,
y = NULL,
title = 'A Negative Binomial Probability Function',
subtitle = NULL,
caption = 'Michael King / Statistics I / Fall 2019') +
theme_bw() +
theme(
legend.position = 'none',
axis.title.x = element_text(vjust = -3),
axis.title.y = element_text(vjust = +3),
plot.margin = margin(unit ='cm', b = 1, l = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x= element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA),
axis.ticks = element_blank(),
text = element_text(
size = 18,
color = 'black',
face = 'italic',
family = 'serif'))

G
A Negative Binomial Probability Function
0.125

0.100

0.075

0.050

0.025

0.000

0 5 10 15 20
Michael King / Statistics I / Fall 2019
Question 5
• Let X be Poisson random variable with mean 5,
• Plot the probability function of X for 0 ≤ X ≤ 20
# Create the data
M <-
data_frame(
x = seq(from = 0, to = 20, by = 1),
y = dpois(x, lambda = 5))

# Build the plot


G <-
ggplot(
data = M) +
geom_col(
mapping = aes(x = x, y = y, fill = y),
color = 'white',
size = 0.2,
width = .97,
alpha = .8) +
scale_fill_continuous(
type = 'gradient') +
labs(
x = 'Random Variable',
y = NULL,
title = 'Poisson Probability Function',
subtitle = 'Michael King') +
theme_bw() +
theme(
legend.position = 'none',
axis.title.x = element_text(vjust = -3),
axis.title.y = element_text(vjust = +3),
plot.margin = margin(unit ='cm', b = 1, l = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x= element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA),
axis.ticks = element_blank(),
text = element_text(
size = 18,
color = 'black',
face = 'italic',
family = 'serif'))

G
Poisson Probability Function
Michael King

0.15

0.10

0.05

0.00

0 5 10 15 20
Random Variable
Question 6
• Let X be a Poisson random variable with mean 4

• Plot the survival function of X over 0 ≤ X ≤ 15


# Define the data
M <-
data_frame(
x = seq(from = 0, to = 15, by = 1),
s_x = ppois(x, lambda = 4, lower.tail = FALSE))

# Build the plot


G <-
ggplot(
data = M) +
geom_col(
mapping = aes(x = x, y = s_x, fill = s_x, alpha = s_x),
color = 'white',
size = 0.2,
width = .95) +
scale_fill_continuous(
type = 'gradient') +
labs(
title = 'The Survival Function of a Poisson Random Variable',
subtitle = NULL,
x = 'x-values',
y = 'Survival Probability') +
theme_bw() +
theme(
legend.position = 'none',
axis.title.x = element_text(vjust = -3),
axis.title.y = element_text(vjust = +3),
plot.margin = margin(unit ='cm', b = 1, l = 1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_rect(fill = NA),
axis.ticks = element_blank(),
text = element_text(
size = 18,
color = 'black',
face = 'italic',
family = 'serif'))

G
The Survival Function of a Poisson Random Variable
1.00

0.75
Survival Probability

0.50

0.25

0.00

0 5 10 15
x−values
Question 7

# Define parameters
shape <- 2
scale <- 3
a <- 2
b <- 7

# Calculate P[2 <= X <= 7]


prob <- pgamma(q = 7, shape = shape, scale = scale) - pgamma(q = 2, shape = shape, scale = scale)
prob

[1] 0.5324553
# Plot the area
M <-
data_frame(
x = seq(from = a, to = b, length = 1e3),
y = dgamma(x, shape = shape, scale = scale),
xp = seq(from = 0, to = 15, length = 1e3),
yp = dgamma(xp, shape = shape, scale = scale))

G <-
ggplot(
data = M) +
geom_area(
mapping = aes(x = x, y = y),
fill = 'darkorange',
alpha = 0.3) +
geom_line(
mapping = aes(x = xp, y = yp),
color = 'darkorange',
size = 1.2,
linetype = 'solid') +
geom_text(
mapping = aes(x = 4.50, y = 0.05),
label = paste('P[2 < X < 7] = ', round(prob, digits = 2)),
size = 5,
color = 'black',
family = 'serif',
fontface = 'italic',
angle = 0) +
labs(
title = 'A Gamma Area Plot',
subtitle = NULL,
x = 'x-values',
y = NULL,
caption = 'Michael King / Statistics I / Fall 2019') +
theme_bw() +
theme(
axis.ticks = element_blank(),
text = element_text(
size = 18,
face = 'italic',
family = 'serif',
color = 'black'),
axis.title.x = element_text(
vjust = 6))

G
A Gamma Area Plot
0.125

0.100

0.075

0.050 P[2 < X < 7] = 0.53

0.025

0.000

0 5 x−values 10 15

Michael King / Statistics I / Fall 2019


Question 8
• Create a vector of 1000 random numbers from an exponential distribution with mean 5 and plot a histogram
# Create the data
M <- data_frame(x = rexp(n = 1000, rate = 0.2))

# Build the plot


G <-
ggplot(
data = M) +
geom_histogram(
mapping = aes(x = x, y = ..density..),
color = 'white',
fill = 'darkred',
alpha = 0.6,
breaks = seq(from = 0, to = 30, length = 30)) +
labs(
x = 'Random Exponential Numbers',
y = NULL,
title = 'Relative Frequency Histogram / Exponential Numbers',
subtitle = NULL,
caption = 'Michael King / Statistics I / Section 4.8') +
theme(
axis.ticks = element_blank(),
text = element_text(
size = 16,
color = 'black',
face = 'italic',
family = 'serif'))

G
Relative Frequency Histogram / Exponential Numbers

0.15

0.10

0.05

0.00

0 10 20 30
Random Exponential Numbers
Michael King / Statistics I / Section 4.8
Question 9

# Plot Beta Density functions for alpha = 1, and beta = 1, 2, 3

M <-
expand.grid(
x = seq(from = 0, to = 1, by = 0.01),
alpha = 3,
beta = c(1, 2, 3)) %>%
mutate(y = dbeta(x = x, shape1 = alpha, shape2 = beta))

G <-
ggplot(
data = M) +
geom_line(
mapping = aes(x = x, y = y, col = as.factor(beta)),
size = 1.2,
linetype = 'solid') +
scale_color_manual(
name = 'Beta',
values = topo.colors(3)) +
labs(
title = 'Beta Density Functions, alpha = 3',
subtitle = NULL,
x = 'x-values',
y = NULL,
caption = 'Michael King / Statistics I / Fall 2019') +
theme(
legend.position = c(0.75, 0.95),
plot.title = element_text(hjust = 0.5),
legend.direction = 'horizontal',
axis.ticks = element_blank(),
text = element_text(
size = 18,
color = 'black',
family = 'serif',
face = 'italic'))
G
Beta Density Functions, alpha = 3
3 Beta 1 2 3

0.00 0.25 0.50 0.75 1.00


x−values
Michael King / Statistics I / Fall 2019
Question 10

# Plot normal densities for mean = 0 and sd = 1, 2


M <-
expand.grid(
x = seq(from = -5, to = 5, length = 1e3),
mu = 0,
sigma = c(1.0, 1.5, 2.0))

M %<>% mutate(y = dnorm(x, mean = mu, sd = sigma))

G <-
ggplot(
data = M) +
geom_line(
mapping = aes(x = x, y = y, color = as.factor(sigma)),
size = 1.3,
linetype = 'solid') +
scale_color_manual(
labels = c('1.0', '1.5', '2.0'),
values = topo.colors(3),
name = 'Standard Deviation') +
labs(
title = 'Normal Density Functions',
caption = 'Michael King / Statistics I / Section 4.10',
x = NULL,
y = NULL) +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = c(0.8, 0.9),
legend.key.width = unit(1, units = 'inches'),
axis.ticks = element_blank(),
text = element_text(
size = 16,
color = 'black',
face = 'italic',
family = 'serif'))

G
Normal Density Functions
0.4 Standard Deviation
1.0
1.5
2.0

0.3

0.2

0.1

0.0

−5.0 −2.5 0.0 2.5 5.0


Michael King / Statistics I / Section 4.10

You might also like