You are on page 1of 8

1) i)

#X: the total weight of eight people chosen at random

#X~N(560,57^2)

#Y:the total weight of nine people chosen at random

#Y~N(630,61^2)

#P(X>650)

mux=650

sdx=57

#[1] 0.5

pnorm(650,mux,sdx,lower=FALSE)

ii)

#P(Y>650)

muy=630

sdy=61

pnorm(650,muy,sdy,lower=FALSE)

#[1] 0.3715054

iii)

#the probability that the total weight of eight people exceeds 650 kg is 0.5 that is nothing

#but the is equal to mean,mode,median as it follows normal distribution

#the probability that the total weight of nine people exceeds 650 kg is 0.4 that is lower

#compared to mean of eight people.

iv)

# the interval for the central region containing 80% of the distribution

#of the total weight of eight people

x <- rnorm(100,650,57)

a <- t.test(x,conf.level = 0.8)

a$conf.int

#651.2017 665.6930

v)

#X~Gamma(alpha,lambda)
alpha <- 96.5220

lambda <- 0.1724

z <- rgamma(100,96.5220,0.1724)

p <- t.test(z,conf.level = 0.8)

p$conf.int

#556.7255 571.0574

vii) we can see there is a lot of difference between the confidence interval of both the distribution

at 80%
2) i)set.seed(12345)

x <- rbeta(100,3,1)

hist(x)

#the shape is going towards right i.e. it is negatively skewed

#data.

ii) set.seed(12345)

y <- numeric(1000)

for(i in 1:1000){

y[i]=mean(x)

print(y)

iii)

y = c(4.9, 3.3, 2.2, 2.3, 1.6, 2.4, 4.7, 1.4, 1.7, 5.1)

t <- t.test(y,conf.level = 0.9)


t$conf.int

#[1] 2.124776 3.795224

#data follows normal distribution and since variance is unknown it follows t

#distribution

iv)

#standard error

sigma <- sd(y)

n <- length(y)

se <- sd(y)/sqrt(n)

se

#[1] 0.4556314

v)

#using bootstrap

set.seed(12345)

estimate <- replicate(10000,se(sample(y,replace = TRUE)))


3)

#fitting glm

model=glm(claims~age+LY+NCD,family=poisson(link="log"))

model$aic

#23104.33

Method 1
cor(claims,age)

#-0.1509437

cor(claims,LY)

#-0.2291188

cor(claims,NCD)

#-0.2240621

#SINCE THE CORRELATION BETWEEN CLAIMS AND LY AND CLAIMS AND NCD IS HIGHER WE CHOOSE
THAT BOTH AS EXPLANATORY VARIABLES

Method 2
model1=glm(claims~LY+NCD,family=poisson(link="log"))

model1$aic

#23102.41

model2=glm(claims~LY+age,family=poisson(link="log"))

model2$aic

#23125.08

model3=glm(claims~NCD+age,family=poisson(link="log"))

model3$aic

#23131.97

model5=glm(claims~LY*NCD,family=poisson(link="log"))

model5$aic

#23104.38
#the aic of model with 3 explanatory is greater than the aic of 2 explanatory variables

#the difference is not much but we should prefer the model where aic is less
4)
i)
data <- policies_matrix
volume <- claims_matrix

#calculating n
n <- ncol(data)
Output: 5

#calculating N
N <- nrow(data)
Output: 4

#calculating Xij
X <- data/volume
Output:
[,1] [,2] [,3] [,4] [,5]

[1,] 19.19929 20.11532 27.87097 25.60058 30.40268

[2,] 38.08743 33.36898 31.61290 32.53968 27.08333

[3,] 22.20859 23.58114 22.37825 22.55401 22.84566

[4,] 20.90190 11.36701 16.68981 15.80851 17.33967

#calculating Xibar
Xibar <- rowSums(data)/rowSums(volume)
Output: 24.73667 32.48666 22.68814 16.28615

#calculating Pibar
Pi <- rowSums(volume)
Output: 307.6 93.7 617.9 262.8

#calculating Pbar
P <- sum(Pi)
Output: 1282

#calculating P*
Pstar <- sum(Pi*(1-Pi/P))/(N*n-1)
Output: 44.71887

#calculating E[m(theta)]
m <- sum(data)/P
Output: 22.58346

#calculating E[s^2(theta)]
s <- mean(rowSums(volume*(X-Xibar)^2)/(n-1))
Output: 620.7337

#calculating var[m(theta)]
v <- (sum(rowSums(volume*(X-m)^2))/(n*N-1)-s)/Pstar
Output: 22.5759

ii)credibility factors
#calculating Zi
Zi <- Pi/(Pi+s/v)
Output: 0.9179475 0.7731316 0.9573976 0.9052847

#calculating premiums
cred: Zi*Xibar+(1-Zi)*m
Output: 24.55999 30.23994 22.68368 16.88260

iii) the expected claim amounts for each of the four insurers in year 6
new.volume <- c(1920 ,575,2820,798)
#the credibility premium for each of the four insurers in year 6
cred*new.volume
output: #47155.19 17387.96 63967.97 13472.32

You might also like