Professional Documents
Culture Documents
TIMEKIT
https://www.r-bloggers.com/timekit-time-series-forecast-applications-using-data-mining/
> install.packages("timekit")
> library(tidyquant)
> library(timekit)
> FB_tbl <- FANG %>%
+ filter(symbol == "FB") %>%
+ select(date, volume)
> FB_tbl
# A tibble: 1,008 × 2
date volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows
> # Everything before 2016 will be used for training (2013-2015 data)
> train <- FB_tbl %>%
+ filter(date < ymd("2016-01-01"))
> # Everything in 2016 will be used for comparing the output
> actual_future <- FB_tbl %>%
+ filter(date >= ymd("2016-01-01"))
> #Next, augment the time series signature to the training set using
tk_augment_timeseries_signature()
> train <- tk_augment_timeseries_signature(train)
> train
# A tibble: 756 × 24
date volume index.num diff year half quarter month month.xts
<date> <dbl> <int> <int> <int> <int> <int> <int> <int>
1 2013-01-02 69846400 1357084800 NA 2013 1 1 1 0
2 2013-01-03 63140600 1357171200 86400 2013 1 1 1 0
3 2013-01-04 72715400 1357257600 86400 2013 1 1 1 0
4 2013-01-07 83781800 1357516800 259200 2013 1 1 1 0
5 2013-01-08 45871300 1357603200 86400 2013 1 1 1 0
6 2013-01-09 104787700 1357689600 86400 2013 1 1 1 0
7 2013-01-10 95316400 1357776000 86400 2013 1 1 1 0
8 2013-01-11 89598000 1357862400 86400 2013 1 1 1 0
9 2013-01-14 98892800 1358121600 259200 2013 1 1 1 0
10 2013-01-15 173242600 1358208000 86400 2013 1 1 1 0
# ... with 746 more rows, and 15 more variables: month.lbl <ord>, day <int>,
# hour <int>, minute <int>, second <int>, wday <int>, wday.xts <int>,
# wday.lbl <ord>, mday <int>, yday <int>, week <int>, week.iso <int>,
# week2 <int>, week3 <int>, week4 <int>
> fit_lm <- lm(volume ~ ., data = train[,-1])
> summary(fit_lm)
Call:
lm(formula = volume ~ ., data = train[, -1])
Residuals:
Min 1Q Median 3Q Max
-56182422 -14721686 -3529158 9826043 289760015
> FB_tbl
# A tibble: 1,008 × 2
date volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows
> FB_tbl %>%
+ tk_xts(silent = TRUE) %>% # Coerce to xts
+ tk_zoo() %>% # Coerce to zoo
+ tk_ts(start = 2013, freq = 252) %>% # Coerce to ts
+ tk_xts() %>% # Coerce back to xts
+ tk_tbl() # Coerce back to tbl
# A tibble: 1,008 × 2
index volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows
> FB_tbl %>%
+ tk_ts(start = 2013, freq = 252, silent = TRUE) %>%
+ tk_tbl(timekit_idx = TRUE)
# A tibble: 1,008 × 2
index volume
<date> <dbl>
1 2013-01-02 69846400
2 2013-01-03 63140600
3 2013-01-04 72715400
4 2013-01-07 83781800
5 2013-01-08 45871300
6 2013-01-09 104787700
7 2013-01-10 95316400
8 2013-01-11 89598000
9 2013-01-14 98892800
10 2013-01-15 173242600
# ... with 998 more rows