Series

R TIME SERIES MACHINE LEARNING APPLICATIONS IN
TIMEKIT
https://www.r-bloggers.com/timekit-time-series-forecast-applications-using-data-mining/
> install.packages("timekit")
> library(tidyquant)
> library(timekit)
> FB_tbl <- FANG %>%
+ filter(symbol == "FB") %>%
+ select(date, volume)
> FB_tbl
# A tibble: 1,008 × 2
date volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows
> # Everything before 2016 will be used for training (2013-2015 data)
> train <- FB_tbl %>%
+ filter(date < ymd("2016-01-01"))
> # Everything in 2016 will be used for comparing the output
> actual_future <- FB_tbl %>%
+ filter(date >= ymd("2016-01-01"))
> #Next, augment the time series signature to the training set using
tk_augment_timeseries_signature()
> train <- tk_augment_timeseries_signature(train)
> train
# A tibble: 756 × 24
date volume index.num diff year half quarter month month.xts
<date> <dbl> <int> <int> <int> <int> <int> <int> <int>
1 2013-01-02 69846400 1357084800 NA 2013 1 1 1 0
2 2013-01-03 63140600 1357171200 86400 2013 1 1 1 0
3 2013-01-04 72715400 1357257600 86400 2013 1 1 1 0
4 2013-01-07 83781800 1357516800 259200 2013 1 1 1 0
5 2013-01-08 45871300 1357603200 86400 2013 1 1 1 0
6 2013-01-09 104787700 1357689600 86400 2013 1 1 1 0
7 2013-01-10 95316400 1357776000 86400 2013 1 1 1 0
8 2013-01-11 89598000 1357862400 86400 2013 1 1 1 0
9 2013-01-14 98892800 1358121600 259200 2013 1 1 1 0
10 2013-01-15 173242600 1358208000 86400 2013 1 1 1 0
# ... with 746 more rows, and 15 more variables: month.lbl <ord>, day <int>,
# hour <int>, minute <int>, second <int>, wday <int>, wday.xts <int>,
# wday.lbl <ord>, mday <int>, yday <int>, week <int>, week.iso <int>,
# week2 <int>, week3 <int>, week4 <int>
> fit_lm <- lm(volume ~ ., data = train[,-1])
> summary(fit_lm)
Call:
lm(formula = volume ~ ., data = train[, -1])
Residuals:
Min 1Q Median 3Q Max
-56182422 -14721686 -3529158 9826043 289760015
Coefficients: (12 not defined because of singularities)

Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.986e+11 4.109e+11 0.727 0.4677
index.num 4.266e+00 6.607e+00 0.646 0.5187
diff -4.755e+01 2.987e+01 -1.592 0.1118
year -1.512e+08 2.086e+08 -0.725 0.4689
half 1.669e+07 1.514e+07 1.102 0.2706
quarter 7.128e+06 7.701e+06 0.926 0.3549
month -1.806e+07 3.711e+06 -4.866 1.4e-06 ***
month.xts NA NA NA NA
month.lbl.L NA NA NA NA
month.lbl.Q 5.420e+06 3.451e+06 1.570 0.1167
month.lbl.C 6.025e+05 7.687e+06 0.078 0.9376
month.lbl^4 -2.337e+06 3.422e+06 -0.683 0.4947
month.lbl^5 -6.224e+06 8.735e+06 -0.713 0.4764
month.lbl^6 7.658e+06 3.455e+06 2.216 0.0270 *
month.lbl^7 6.488e+06 5.521e+06 1.175 0.2403
month.lbl^8 3.082e+06 3.397e+06 0.907 0.3645
month.lbl^9 NA NA NA NA
month.lbl^10 -5.133e+06 3.389e+06 -1.515 0.1303
month.lbl^11 NA NA NA NA
day NA NA NA NA
hour NA NA NA NA
minute NA NA NA NA
second NA NA NA NA
wday -7.561e+05 1.391e+06 -0.544 0.5868
wday.xts NA NA NA NA
wday.lbl.L NA NA NA NA
wday.lbl.Q 2.538e+06 3.569e+06 0.711 0.4773
wday.lbl.C -6.012e+06 2.570e+06 -2.339 0.0196 *
wday.lbl^4 -1.394e+06 2.210e+06 -0.631 0.5284
mday NA NA NA NA
yday NA NA NA NA
week 1.249e+05 3.900e+06 0.032 0.9745
week.iso 3.581e+05 2.458e+05 1.457 0.1456
week2 -2.302e+06 2.189e+06 -1.052 0.2932
week3 8.228e+05 1.233e+06 0.667 0.5047
week4 1.940e+06 9.881e+05 1.963 0.0500 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 26960000 on 731 degrees of freedom

(1 observation deleted due to missingness)
Multiple R-squared: 0.2628, Adjusted R-squared: 0.2396
F-statistic: 11.33 on 23 and 731 DF, p-value: < 2.2e-16
> # US trading holidays in 2016

> holidays <- c("2016-01-01", "2016-01-18", "2016-02-15", "2016-03-25", "2016-05-30",
+ "2016-07-04", "2016-09-05", "2016-11-24", "2016-12-23", "2016-12-26",
+ "2016-12-30") %>%
+ ymd()
> # Build new data for prediction: 3 Steps
> new_data <- train %>%
+ tk_index() %>%
+ tk_make_future_timeseries(n_future = 252, skip_values = holidays, inspect_weekdays = TRUE)
%>%
+ tk_get_timeseries_signature()
> new_data
# A tibble: 252 × 23
index index.num diff year half quarter month month.xts month.lbl
<date> <int> <int> <int> <int> <int> <int> <int> <ord>
1 2016-01-04 1451865600 NA 2016 1 1 1 0 January
2 2016-01-05 1451952000 86400 2016 1 1 1 0 January
3 2016-01-06 1452038400 86400 2016 1 1 1 0 January
4 2016-01-07 1452124800 86400 2016 1 1 1 0 January
5 2016-01-08 1452211200 86400 2016 1 1 1 0 January
6 2016-01-11 1452470400 259200 2016 1 1 1 0 January
7 2016-01-12 1452556800 86400 2016 1 1 1 0 January
8 2016-01-13 1452643200 86400 2016 1 1 1 0 January
9 2016-01-14 1452729600 86400 2016 1 1 1 0 January
10 2016-01-15 1452816000 86400 2016 1 1 1 0 January
# ... with 242 more rows, and 14 more variables: day <int>, hour <int>,
# minute <int>, second <int>, wday <int>, wday.xts <int>, wday.lbl <ord>,
# mday <int>, yday <int>, week <int>, week.iso <int>, week2 <int>,
# week3 <int>, week4 <int>
> pred_lm <- predict(fit_lm, newdata = new_data)
Warning message:
In predict.lm(fit_lm, newdata = new_data) :
prediction from a rank-deficient fit may be misleading
> # Add predicted values to actuals data
> actual_future <- actual_future %>%
+ add_column(yhat = pred_lm)
> # Plot using ggplot
> actual_future %>%
+ ggplot(aes(x = date)) +
+ geom_line(aes(y = volume), data = train, color = palette_light()[[1]]) +
+ geom_line(aes(y = volume), color = palette_light()[[1]]) +
+ geom_line(aes(y = yhat), color = palette_light()[[2]]) +
+ scale_y_continuous(labels = scales::comma) +
+ labs(title = "Forecasting FB Daily Volume: New Methods Using Data Mining",
+ subtitle = "Linear Regression Model Applied to Time Series Signature",
+ x = "",
+ y = "Volume",
+ caption = "Data from Yahoo! Finance: 'FB' Daily Volume from 2013 to 2016.") +
+ theme_tq(base_size = 12)
Warning message:
Removed 1 rows containing missing values (geom_path).
> FB_tbl
# A tibble: 1,008 × 2
date volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
> FB_tbl %>%
+ tk_xts(silent = TRUE) %>% # Coerce to xts
+ tk_zoo() %>% # Coerce to zoo
+ tk_ts(start = 2013, freq = 252) %>% # Coerce to ts
+ tk_xts() %>% # Coerce back to xts
+ tk_tbl() # Coerce back to tbl
# A tibble: 1,008 × 2
index volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
> FB_tbl %>%
+ tk_ts(start = 2013, freq = 252, silent = TRUE) %>%
+ tk_tbl(timekit_idx = TRUE)
# A tibble: 1,008 × 2
index volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600

Series

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Series

Uploaded by

Copyright:

Available Formats

R TIME SERIES MACHINE LEARNING APPLICATIONS IN

Coefficients: (12 not defined because of singularities)

Residual standard error: 26960000 on 731 degrees of freedom

> # US trading holidays in 2016

You might also like