You are on page 1of 12

Machine Learning in Finance Homework 1

109208095 經濟三 柯龍
1.
程式碼

# Step 1: Set the random number generator


set.seed(333)

# Step 2: Generate the predictors


n <- 300
f <- rnorm(n)
x1 <- 0.9 * f + rnorm(n)
x2 <- -1.1 * f + rnorm(n)

# Step 3: Define the response


y <- 0.5 + 0.8 * x1 + 0.7 * x2 + rnorm(n)

# Step 4: Regress y on x1 and x2


fit <- lm(y ~ x1 + x2)
summary(fit)
# Step 5: Recover beta_hat_1 using FWL
# Regress x1 on x2 and obtain the residuals

## Recovering x1
par_x2 <- lm(x1 ~ x2)
resid_par_x2 <- residuals(par_x2)

# Regress y on x2
fit_x2 <- lm(y ~ x2)
resid_x2 <- residuals(fit_x2)

# Regress resid_x2 on resid_par_x2


fit_fwl <- lm(resid_x2~resid_par_x2)

# Compare the coefficients to those from the multiple regression


summary(fit)$coefficients[2]
summary(fit_fwl)$coefficients[2]

1/12
## Recovering beta2
par_x1 <- lm(x2 ~ x1)
resid_par_x1 <- residuals(par_x1)

# Regress y on beta1
fit_x1 <- lm(y ~ x1)
resid_x1 <- residuals(fit_x1)

# Regress resid_x1 on resid_par_x1


fit_fwl2 <- lm(resid_x1~resid_par_x1)

# Compare the coefficients to those from the multiple regression


summary(fit)$coefficients[3]
summary(fit_fwl2)$coefficients[2]

程式結果(結尾)

……………(略)
> # Compare the coefficients to those from the multiple regression
> summary(fit)$coefficients[2]
[1] 0.8192845
> summary(fit_fwl)$coefficients[2]
[1] 0.8192845

……………(略)
> summary(fit)$coefficients[3]
[1] 0.7078249
> summary(fit_fwl2)$coefficients[2]
[1] 0.7078249

2. 下面程式會因為題目分步驟而切開,造成助教難以複製到 R。所以我在最下面有附完整第
二題的程式碼,供助教或老師使用
1. Download the TWSE index (the symbol is ^TWII) from “2015-01-01” to “2022-12-31” with
tq_get .
install.packages("tidyquant")
install.packages("caret")

# Load packages
library(tidyquant)
library(caret)

2/12
library(dplyr)
library(lubridate)
library(data.table)

ticker="^TWII"
twii_data <- as.data.table(tidyquant::tq_get(ticker,
from = "2015-01-01",
to = "2022-12-31",
get = "stock.prices") )
twii_data <- na.omit(twii_data)

2. Create a dataset that has weekly returns and its past 12 lagged values.
## Transform daily to weekly
twii_data[, `:=`(
yr=year(date),
wk=isoweek(date),
d=lubridate::wday(date,week_start=1)
)]
setkey(twii_data,symbol,yr,wk,d) # set the order
saveRDS(twii_data,"twii_daily_data.RDS")
twii_data = readRDS("twii_daily_data.RDS")

# 可以 For each (symbol, year, week), select the last one


twii_data_weekly <- twii_data[, .SD[.N], by=.(symbol,yr,wk)]

# calculate weekly returns


twii_data_weekly[,weekly_rets:=adjusted/shift(adjusted)-1, by
= .(symbol)]

# Create lagged variables


for(i in 1:12){
twii_data_weekly[[paste0("lag", i)]] <-
lag(twii_data_weekly$weekly_rets, n = i)
}

#後面用來判別累積報酬率的正負值
twii_data_weekly[, cumw1:=sign(lag1)]

3/12
twii_data_weekly[, cumw2:=sign(lag1*lag2)]
twii_data_weekly[, cumw4:=sign(lag1*lag2*lag3*lag4)]
twii_data_weekly[, cumw12:=sign(lag1*lag2*lag3*lag4*lag5
*lag6*lag7*lag8*lag10*lag11
*lag12)]

3. Conduct the rolling prediction experiment using the past 106 weeks to forecast the one-week
ahead returns. In each period collect the in-sample R-squared, the one-week ahead prediction,
and the standard error of the prediction.
# Set the number of weeks to use for training
train_size <- 106

pred_name = c(paste0("lag",1:12))

# Loop through each week and conduct the rolling prediction experiment
for (i in 106:411) {

train_data = twii_data_weekly[ (i-106):i, c("weekly_rets",pred_name),


with=F]

lm_mdl = lm(weekly_rets ~ ., data=train_data)

test_data = twii_data_weekly[ i+1L, c(pred_name), with=F ]

y_pred = predict(lm_mdl, newdata = test_data)

twii_data_weekly[i+1L,pred:=y_pred, by = .(symbol)]

twii_data_weekly[i+1L,insr:=summary(lm_mdl)$r.squared, by = .(symbol)]

twii_data_weekly[i+1L,se:=summary(lm_mdl)$sigma, by = .(symbol)]

twii_data_weekly[i+1L,error:=abs(weekly_rets-y_pred), by = .(symbol)]

4. Plot the in-sample R-squared and the absolute value of forecast error. Does the in-sample
goodness-of-fit have relationship with the future prediction accuracy?

4/12
To answer the question, No. From what we can see in the plot below, the in-sample goodness-of-
fit doesn’t have relationship with the future prediction accuracy
# Plot the in-sample R-squared and the absolute forecast error
library(ggplot2)

ggplot(twii_data_weekly, aes(x = date)) +


geom_line(aes(y = insr), color = "red") +
geom_line(aes(y = error), color = "blue") +
labs(x = "Date", y = "Value") +
scale_y_continuous(sec.axis = sec_axis(~ ., name = "Absolute Forecast
Error", labels = scales::comma)) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "bottom")

5. Plot the standard error of the prediction and the absolute value of forecast. Could the large
value of ex-ante prediction uncertainty be indication of future prediction accuracy?
To answer the question, yes. From what we can see in the plot below, the large value of ex-ante
prediction uncertainty be indication of future prediction accuracy.
# Plot the standard error of the prediction and the absolute value of
forecast
ggplot(twii_data_weekly, aes(x = date)) +
geom_line(aes(y = error), color = "red") +
geom_line(aes(y = abs(pred)), color = "blue") +

5/12
labs(x = "Date", y = "Value") +
scale_y_continuous(sec.axis = sec_axis(~ ., name = "Forecast and
Reality ", labels = scales::comma)) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "bottom")

6. Compute the Sharpe ratio of the strategy that is based on the sign of the prediction. That is,
y-hat > 0 then take a long position 𝑤 = 1, and if 𝑦
̂ < 0, then 𝑤 = −1
The Sharpe ratio turned out to be -0.2303117
# 建構投組策略
res = na.omit(twii_data_weekly[,.(date,pred,weekly_rets, cumw1, cumw2,
cumw4, cumw12)])
res[, weight:=sign(pred), by = .(date)]
res[, Rp:=weight*weekly_rets, by = .(date)]
res[, Rp1:=cumw1*weekly_rets, by =.(date)]
res[, Rp2:=cumw2*weekly_rets, by =.(date)]
res[, Rp4:=cumw4*weekly_rets, by =.(date)]
res[, Rp12:=cumw12*weekly_rets, by =.(date)]

#calculate sharpe ratio


m = res[, mean(Rp)]
s = res[, sd(Rp)]
m/s*sqrt(52)

6/12
7. Construct the following benchmark strategies and compare their Sharpe ratio with the one
based on linear regression prediction.
 Buy-and-hold
 Trade based on the sign of past 𝑘 cumulative weekly returns, where 𝑘 = 1, 2, 4, 12
# buy-and-hold portfolio Sharpe Ratio
m_bnh = res[, mean(weekly_rets)]
s_bnh = res[, sd(weekly_rets)]
m_bnh/s_bnh*sqrt(52)

# k=1 strategy portfolio sharpe ratio


m_b1 = res[, mean(Rp1)]
s_b1 = res[, sd(Rp1)]
m_b1/s_b1*sqrt(52)

# k=2 strategy portfolio sharpe ratio


m_b2 = res[, mean(Rp2)]
s_b2 = res[, sd(Rp2)]
m_b2/s_b2*sqrt(52)

# k=4 strategy portfolio sharpe ratio


m_b4 = res[, mean(Rp4)]
s_b4 = res[, sd(Rp4)]
m_b4/s_b4*sqrt(52)

# k=12 strategy portfolio sharpe ratio


m_b12 = res[, mean(Rp12)]
s_b12 = res[, sd(Rp12)]
m_b12/s_b12*sqrt(52)

程式結果

> # buy-and-hold portfolio Sharpe Ratio


> m_bnh = res[, mean(weekly_rets)]
> s_bnh = res[, sd(weekly_rets)]
> m_bnh/s_bnh*sqrt(52)
[1] 0.5025714
>
> # k=1 strategy portfolio sharpe ratio
> m_b1 = res[, mean(Rp1)]

7/12
> s_b1 = res[, sd(Rp1)]
> m_b1/s_b1*sqrt(52)
[1] 0.4916208
>
> # k=2 strategy portfolio sharpe ratio
> m_b2 = res[, mean(Rp2)]
> s_b2 = res[, sd(Rp2)]
> m_b2/s_b2*sqrt(52)
[1] 0.32154
>
> # k=4 strategy portfolio sharpe ratio
> m_b4 = res[, mean(Rp4)]
> s_b4 = res[, sd(Rp4)]
> m_b4/s_b4*sqrt(52)
[1] -0.01068683
>
> # k=12 strategy portfolio sharpe ratio
> m_b12 = res[, mean(Rp12)]
> s_b12 = res[, sd(Rp12)]
> m_b12/s_b12*sqrt(52)
[1] 0.8183462

第二題完整程式碼
install.packages("tidyquant")
install.packages("caret")

# Load packages
library(tidyquant)
library(caret)
library(dplyr)
library(lubridate)
library(data.table)

ticker="^TWII"
twii_data <- as.data.table(tidyquant::tq_get(ticker,
from = "2015-01-01",
to = "2022-12-31",
get = "stock.prices") )

8/12
twii_data <- na.omit(twii_data)

## Transform daily to weekly


twii_data[, `:=`(
yr=year(date),
wk=isoweek(date),
d=lubridate::wday(date,week_start=1)
)]
setkey(twii_data,symbol,yr,wk,d) # set the order
saveRDS(twii_data,"twii_daily_data.RDS")
twii_data = readRDS("twii_daily_data.RDS")

# 可以 For each (symbol, year, week), select the last one


twii_data_weekly <- twii_data[, .SD[.N], by=.(symbol,yr,wk)]

# calculate weekly returns


twii_data_weekly[,weekly_rets:=adjusted/shift(adjusted)-1,
by = .(symbol)]

# Create lagged variables


for(i in 1:12){
twii_data_weekly[[paste0("lag", i)]] <-
lag(twii_data_weekly$weekly_rets, n = i)
}
#後面用來判別累積報酬率的正負值
twii_data_weekly[, cumw1:=sign(lag1)]
twii_data_weekly[, cumw2:=sign(lag1*lag2)]
twii_data_weekly[, cumw4:=sign(lag1*lag2*lag3*lag4)]
twii_data_weekly[, cumw12:=sign(lag1*lag2*lag3*lag4*lag5
*lag6*lag7*lag8*lag10*lag11
*lag12)]

# Set the number of weeks to use for training


train_size <- 106

9/12
pred_name = c(paste0("lag",1:12))

# Loop through each week and conduct the rolling prediction


experiment
for (i in 106:411) {

train_data = twii_data_weekly[ (i-106):i,


c("weekly_rets",pred_name), with=F]

lm_mdl = lm(weekly_rets ~ ., data=train_data)

test_data = twii_data_weekly[ i+1L, c(pred_name), with=F ]

y_pred = predict(lm_mdl, newdata = test_data)

twii_data_weekly[i+1L,pred:=y_pred, by = .(symbol)]

twii_data_weekly[i+1L,insr:=summary(lm_mdl)$r.squared, by
= .(symbol)]

twii_data_weekly[i+1L,se:=summary(lm_mdl)$sigma, by
= .(symbol)]

twii_data_weekly[i+1L,error:=abs(weekly_rets-y_pred), by
= .(symbol)]

# Plot the in-sample R-squared and the absolute forecast


error
library(ggplot2)

ggplot(twii_data_weekly, aes(x = date)) +


geom_line(aes(y = insr), color = "red") +
geom_line(aes(y = error), color = "blue") +
labs(x = "Date", y = "Value") +
scale_y_continuous(sec.axis = sec_axis(~ ., name = "Absolute

10/12
Forecast Error", labels = scales::comma)) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "bottom")

# Plot the standard error of the prediction and the absolute


value of forecast
ggplot(twii_data_weekly, aes(x = date)) +
geom_line(aes(y = error), color = "red") +
geom_line(aes(y = abs(pred)), color = "blue") +
labs(x = "Date", y = "Value") +
scale_y_continuous(sec.axis = sec_axis(~ ., name = "Forecast
and Reality ", labels = scales::comma)) +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "bottom")

# 建構投組策略
res = na.omit(twii_data_weekly[,.(date,pred,weekly_rets,
cumw1, cumw2, cumw4, cumw12)])
res[, weight:=sign(pred), by = .(date)]
res[, Rp:=weight*weekly_rets, by = .(date)]
res[, Rp1:=cumw1*weekly_rets, by =.(date)]
res[, Rp2:=cumw2*weekly_rets, by =.(date)]
res[, Rp4:=cumw4*weekly_rets, by =.(date)]
res[, Rp12:=cumw12*weekly_rets, by =.(date)]

#calculate sharpe ratio


m = res[, mean(Rp)]
s = res[, sd(Rp)]
m/s*sqrt(52)

# buy-and-hold portfolio Sharpe Ratio


m_bnh = res[, mean(weekly_rets)]
s_bnh = res[, sd(weekly_rets)]
m_bnh/s_bnh*sqrt(52)

11/12
# k=1 strategy portfolio sharpe ratio
m_b1 = res[, mean(Rp1)]
s_b1 = res[, sd(Rp1)]
m_b1/s_b1*sqrt(52)

# k=2 strategy portfolio sharpe ratio


m_b2 = res[, mean(Rp2)]
s_b2 = res[, sd(Rp2)]
m_b2/s_b2*sqrt(52)

# k=4 strategy portfolio sharpe ratio


m_b4 = res[, mean(Rp4)]
s_b4 = res[, sd(Rp4)]
m_b4/s_b4*sqrt(52)

# k=12 strategy portfolio sharpe ratio


m_b12 = res[, mean(Rp12)]
s_b12 = res[, sd(Rp12)]
m_b12/s_b12*sqrt(52)

12/12

You might also like