You are on page 1of 5

R TIME SERIES MACHINE LEARNING APPLICATIONS IN

TIMEKIT
https://www.r-bloggers.com/timekit-time-series-forecast-applications-using-data-mining/

> install.packages("timekit")
> library(tidyquant)
> library(timekit)
> FB_tbl <- FANG %>%
+ filter(symbol == "FB") %>%
+ select(date, volume)
> FB_tbl
# A tibble: 1,008 × 2
date volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows
> # Everything before 2016 will be used for training (2013-2015 data)
> train <- FB_tbl %>%
+ filter(date < ymd("2016-01-01"))
> # Everything in 2016 will be used for comparing the output
> actual_future <- FB_tbl %>%
+ filter(date >= ymd("2016-01-01"))
> #Next, augment the time series signature to the training set using
tk_augment_timeseries_signature()
> train <- tk_augment_timeseries_signature(train)
> train
# A tibble: 756 × 24
date volume index.num diff year half quarter month month.xts
<date> <dbl> <int> <int> <int> <int> <int> <int> <int>
1 2013-01-02 69846400 1357084800 NA 2013 1 1 1 0
2 2013-01-03 63140600 1357171200 86400 2013 1 1 1 0
3 2013-01-04 72715400 1357257600 86400 2013 1 1 1 0
4 2013-01-07 83781800 1357516800 259200 2013 1 1 1 0
5 2013-01-08 45871300 1357603200 86400 2013 1 1 1 0
6 2013-01-09 104787700 1357689600 86400 2013 1 1 1 0
7 2013-01-10 95316400 1357776000 86400 2013 1 1 1 0
8 2013-01-11 89598000 1357862400 86400 2013 1 1 1 0
9 2013-01-14 98892800 1358121600 259200 2013 1 1 1 0
10 2013-01-15 173242600 1358208000 86400 2013 1 1 1 0
# ... with 746 more rows, and 15 more variables: month.lbl <ord>, day <int>,
# hour <int>, minute <int>, second <int>, wday <int>, wday.xts <int>,
# wday.lbl <ord>, mday <int>, yday <int>, week <int>, week.iso <int>,
# week2 <int>, week3 <int>, week4 <int>
> fit_lm <- lm(volume ~ ., data = train[,-1])
> summary(fit_lm)

Call:
lm(formula = volume ~ ., data = train[, -1])

Residuals:
Min 1Q Median 3Q Max
-56182422 -14721686 -3529158 9826043 289760015

Coefficients: (12 not defined because of singularities)


Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.986e+11 4.109e+11 0.727 0.4677
index.num 4.266e+00 6.607e+00 0.646 0.5187
diff -4.755e+01 2.987e+01 -1.592 0.1118
year -1.512e+08 2.086e+08 -0.725 0.4689
half 1.669e+07 1.514e+07 1.102 0.2706
quarter 7.128e+06 7.701e+06 0.926 0.3549
month -1.806e+07 3.711e+06 -4.866 1.4e-06 ***
month.xts NA NA NA NA
month.lbl.L NA NA NA NA
month.lbl.Q 5.420e+06 3.451e+06 1.570 0.1167
month.lbl.C 6.025e+05 7.687e+06 0.078 0.9376
month.lbl^4 -2.337e+06 3.422e+06 -0.683 0.4947
month.lbl^5 -6.224e+06 8.735e+06 -0.713 0.4764
month.lbl^6 7.658e+06 3.455e+06 2.216 0.0270 *
month.lbl^7 6.488e+06 5.521e+06 1.175 0.2403
month.lbl^8 3.082e+06 3.397e+06 0.907 0.3645
month.lbl^9 NA NA NA NA
month.lbl^10 -5.133e+06 3.389e+06 -1.515 0.1303
month.lbl^11 NA NA NA NA
day NA NA NA NA
hour NA NA NA NA
minute NA NA NA NA
second NA NA NA NA
wday -7.561e+05 1.391e+06 -0.544 0.5868
wday.xts NA NA NA NA
wday.lbl.L NA NA NA NA
wday.lbl.Q 2.538e+06 3.569e+06 0.711 0.4773
wday.lbl.C -6.012e+06 2.570e+06 -2.339 0.0196 *
wday.lbl^4 -1.394e+06 2.210e+06 -0.631 0.5284
mday NA NA NA NA
yday NA NA NA NA
week 1.249e+05 3.900e+06 0.032 0.9745
week.iso 3.581e+05 2.458e+05 1.457 0.1456
week2 -2.302e+06 2.189e+06 -1.052 0.2932
week3 8.228e+05 1.233e+06 0.667 0.5047
week4 1.940e+06 9.881e+05 1.963 0.0500 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 26960000 on 731 degrees of freedom


(1 observation deleted due to missingness)
Multiple R-squared: 0.2628, Adjusted R-squared: 0.2396
F-statistic: 11.33 on 23 and 731 DF, p-value: < 2.2e-16

> # US trading holidays in 2016


> holidays <- c("2016-01-01", "2016-01-18", "2016-02-15", "2016-03-25", "2016-05-30",
+ "2016-07-04", "2016-09-05", "2016-11-24", "2016-12-23", "2016-12-26",
+ "2016-12-30") %>%
+ ymd()
> # Build new data for prediction: 3 Steps
> new_data <- train %>%
+ tk_index() %>%
+ tk_make_future_timeseries(n_future = 252, skip_values = holidays, inspect_weekdays = TRUE)
%>%
+ tk_get_timeseries_signature()
> new_data
# A tibble: 252 × 23
index index.num diff year half quarter month month.xts month.lbl
<date> <int> <int> <int> <int> <int> <int> <int> <ord>
1 2016-01-04 1451865600 NA 2016 1 1 1 0 January
2 2016-01-05 1451952000 86400 2016 1 1 1 0 January
3 2016-01-06 1452038400 86400 2016 1 1 1 0 January
4 2016-01-07 1452124800 86400 2016 1 1 1 0 January
5 2016-01-08 1452211200 86400 2016 1 1 1 0 January
6 2016-01-11 1452470400 259200 2016 1 1 1 0 January
7 2016-01-12 1452556800 86400 2016 1 1 1 0 January
8 2016-01-13 1452643200 86400 2016 1 1 1 0 January
9 2016-01-14 1452729600 86400 2016 1 1 1 0 January
10 2016-01-15 1452816000 86400 2016 1 1 1 0 January
# ... with 242 more rows, and 14 more variables: day <int>, hour <int>,
# minute <int>, second <int>, wday <int>, wday.xts <int>, wday.lbl <ord>,
# mday <int>, yday <int>, week <int>, week.iso <int>, week2 <int>,
# week3 <int>, week4 <int>
> pred_lm <- predict(fit_lm, newdata = new_data)
Warning message:
In predict.lm(fit_lm, newdata = new_data) :
prediction from a rank-deficient fit may be misleading
> # Add predicted values to actuals data
> actual_future <- actual_future %>%
+ add_column(yhat = pred_lm)
> # Plot using ggplot
> actual_future %>%
+ ggplot(aes(x = date)) +
+ geom_line(aes(y = volume), data = train, color = palette_light()[[1]]) +
+ geom_line(aes(y = volume), color = palette_light()[[1]]) +
+ geom_line(aes(y = yhat), color = palette_light()[[2]]) +
+ scale_y_continuous(labels = scales::comma) +
+ labs(title = "Forecasting FB Daily Volume: New Methods Using Data Mining",
+ subtitle = "Linear Regression Model Applied to Time Series Signature",
+ x = "",
+ y = "Volume",
+ caption = "Data from Yahoo! Finance: 'FB' Daily Volume from 2013 to 2016.") +
+ theme_tq(base_size = 12)
Warning message:
Removed 1 rows containing missing values (geom_path).

> FB_tbl
# A tibble: 1,008 × 2
date volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows
> FB_tbl %>%
+ tk_xts(silent = TRUE) %>% # Coerce to xts
+ tk_zoo() %>% # Coerce to zoo
+ tk_ts(start = 2013, freq = 252) %>% # Coerce to ts
+ tk_xts() %>% # Coerce back to xts
+ tk_tbl() # Coerce back to tbl
# A tibble: 1,008 × 2
index volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows
> FB_tbl %>%
+ tk_ts(start = 2013, freq = 252, silent = TRUE) %>%
+ tk_tbl(timekit_idx = TRUE)
# A tibble: 1,008 × 2
index volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows

You might also like