You are on page 1of 2

# 1.

age <- c(23, 23, 27, 27, 39, 41, 47, 49, 50, 52, 54, 54, 56, 57, 58, 58, 60, 61)

fat <- c(9.5, 26.5, 7.8, 17.8, 31.4, 25.9, 27.4, 27.2, 31.2, 34.6, 42.5, 28.8,
33.4, 30.2, 34.1, 32.9, 41.2, 35.7)

data <- data.frame(age=age, fat=fat)

# (a)
mean(data$fat)
# [1] 28.78333
median(data$fat)
# [1] 30.7
sd(data$fat)
# [1] 9.254395

# (b)
boxplot(data)
# age seems to be skewed to the left whle fat appears
# symmetrically distributed.

# (c)
plot(age, fat)
# age and fat seem to be positively correlated.

# (d)
hist(data$age)
hist(data$fat)

# (e)
cor(data)
# age fat
# age 1.0000000 0.8176188
# fat 0.8176188 1.0000000

# It has a correlation of +0.81, which is pretty high.

# (f)
cov(data)
# age fat
# age 174.7320 100.01961
# fat 100.0196 85.64382

# The main diagonal shows the variance of each variable,


# and the off diagonal is a covariance between them.

# 2
x <- c(200, 300, 400, 600, 1000)
# (a)
(x-min(x)) / (max(x) - min(x))

# (b)
(x-min(x)) / (max(x) - min(x)) * 2 - 1

# (c)
(x - mean(x)) / sd(x)
# (d)
j = log10(max(x))
x / 10^j

# (e)
# First of all, normalization retains the association between variables.
# For example, correlation coefficients for normalized or normalized variables
# are the same. Second of all, the numerical computation becomes stablilized
# when the numbers are similarily scaled. The computation of very big number and
# very small number may produce the wrong computation due to the roundoff error
# which is ubiquitous in the binary representation nature of computation.

You might also like