This notebook is a companion material to the article Dynamic decision making with predictive panels by Guillaume Coqueret and Bertrand Tavin.
The code is folded to lighten the presentation, but can be accessed by clicking on the corresponding unfolding buttons.
It corresponds to the second exercise in the paper, for which the data is public.

1 Packages

First, we load the required packages.

if(!require(tidyverse)){install.packages(c("tidyverse", "lubridate", "nlshrink", "broom", "gt"))}
library(tidyverse)  # Data handling
library(lubridate)  # Date handling
library(patchwork)  # Graph layout
library(nlshrink)   # Shrinkage estimators
library(broom)      # Regression output (tidy)
library(texreg)     # LaTeX regression output
library(latex2exp)  # LaTeX in ggplot
library(gt)         # Neat tables

2 Data

Second, we download & format the data, which comes from Ken French’s Data Library.
We work with the 10 industry portfolios, sampled at the daily frequency.
NOTE: depending on the files, the number of rows to skip (when reading the .csv file) changes. It typically varies between 2 and 10.

We drop the data before 1980 but it’s easy to consider 1950 onwards.

temp <- tempfile()
link <- "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/10_Industry_Portfolios_daily_CSV.zip"

download.file(link, temp, quiet = TRUE)                       # Download!
df_assets <- read_csv(unz(temp, "10_Industry_Portfolios_Daily.csv"),  skip = 9) %>%  
    # Check the nb of lines to skip
    rename(Date = `...1`) %>%                                 # Change name of the first column
    mutate_at(vars(-Date), as.numeric) %>%                    # Convert to number
    mutate_at(vars(-Date), function(x){x/100}) %>%            # Divide by 100
    mutate(Date = ymd(parse_date_time(Date, "%Y%m%d")))       # End of month date
split <- which(is.na(df_assets$Date))[1]                      # Split between portfolio types!
df_assets <- df_assets[1:(split-1),]  
df_assets <- df_assets %>% filter(year(Date) >= 1980)         # Choose a period (after 1950: more reliable)

colnames(df_assets)[1] <- "date"
stock_ids <- colnames(df_assets[,c(2:ncol(df_assets))])       # List of assets
nb_assets <- length(stock_ids)                                # Number of assets
list_years <- c(1980:2022)                                    # List of years considered

3 Main function

Note: it allows for sample versus shrinkage estimators (see last argument of the function).

time_series_output <- function(nb_assets, df_assets, sample_size, shrinkage){ # The main function
    
    one_n <- as.matrix(rep(1,nb_assets))            # A vector of ones
    nb_dates <- length(df_assets$date)              # The number of dates
    list_daily_dates <- df_assets$date              # List of these dates
    
    Sigma_is <- list()                              # Initializing the variables
    Sigma_oos <- list()
    inv_Sigma_is <- list()
    inv_Sigma_oos <- list()
    vec_mu_is <- list()
    vec_mu_oos <- list()
    
    nb_t <- sample_size * 21                        # Or: 500 # 252 # Length of samples (is & oos)
    t0 <- nb_t                                      # start date
    step_lgth <- 21
    
    nb_points <- (nb_dates - 2*nb_t) %/% step_lgth  # Max number of monthly steps in the dataset
    tend <- t0 + step_lgth*nb_points
    one_t <- as.matrix(rep(1,nb_t))
    date_list <- as.Date(df_assets[t0+nb_t,]$date)
    
    for(j in 1:nb_points){
        n_start_is <- t0+(j-1)*step_lgth - nb_t + 1
        n_end_is <- t0+(j-1)*step_lgth
        n_start_oos <- t0+(j-1)*step_lgth + 1       # Out-of-sample starts the day after
        n_end_oos <- t0+(j-1)*step_lgth + nb_t
        date_list[j] <- as.Date(df_assets[n_end_is,]$date)
        
        # In-sample
        data_temp_is <- df_assets[c(n_start_is:n_end_is),]
        if(shrinkage == TRUE){
            Sigma_is[[j]] <- linshrink_cov(X = as.matrix(data_temp_is[stock_ids]))
        } else {
            Sigma_is[[j]] <- var(data_temp_is[stock_ids])                 # Sigma_is = X'X
        }
        vec_mu_is[[j]] <- as.matrix(colMeans(data_temp_is[stock_ids]))    # v_is = X'y
        inv_Sigma_is[[j]] <- solve(Sigma_is[[j]])                         # Sigma_is^{-1}
        
        # Out-of-Sample
        data_temp_oos <- df_assets[c(n_start_oos:n_end_oos),]
        if(shrinkage == TRUE){
            Sigma_oos[[j]] <- linshrink_cov(X = as.matrix(data_temp_oos[stock_ids]))
        } else {
            Sigma_oos[[j]] <- var(data_temp_oos[stock_ids])
        }
        vec_mu_oos[[j]] <- as.matrix(colMeans(data_temp_oos[stock_ids]))
        inv_Sigma_oos[[j]] <- solve(Sigma_oos[[j]])
        
    }
    
    loss_series <- 0 # SSR
    dt_series <- 0
    Deltat_series <- 0
    
    for(j in 1:nb_points){
        loss_series[j] <- t(vec_mu_is[[j]]) %*% inv_Sigma_is[[j]] %*% Sigma_oos[[j]] %*% inv_Sigma_is[[j]] %*% vec_mu_is[[j]] - 
            2 * t(vec_mu_is[[j]]) %*% inv_Sigma_is[[j]] %*% vec_mu_oos[[j]] + 
            t(vec_mu_oos[[j]]) %*% inv_Sigma_oos[[j]] %*% vec_mu_oos[[j]]
        Deltat_series[j] <- 0.00001*norm(inv_Sigma_is[[j]] - inv_Sigma_oos[[j]], type = "2")  # Covariate shift
        dt_series[j] <- 10*sum(abs(vec_mu_is[[j]] - vec_mu_oos[[j]]))                         # Concept drift
    }
    
    output <- list(date_list = date_list, L = loss_series, d_t = dt_series, Delta_t = Deltat_series)
    return(output)
} # End of function

Then, we gather the estimates for the loss \(L\), the drift \(d_t\) and the shift \(D_t\).

shrink_k <- TRUE # With shrinkage

fct_output1m <- time_series_output(nb_assets, df_assets, 1, shrink_k)
output1m <- data.frame(date = fct_output1m$date_list, 
                       L = fct_output1m$L, 
                       d_t = fct_output1m$d_t, 
                       D_t = fct_output1m$Delta_t)
fct_output3m <- time_series_output(nb_assets, df_assets, 3, shrink_k)
output3m <- data.frame(date = fct_output3m$date_list, 
                       L = fct_output3m$L, 
                       d_t = fct_output3m$d_t, 
                       D_t = fct_output3m$Delta_t)
fct_output6m <- time_series_output(nb_assets, df_assets, 6, shrink_k)
output6m <- data.frame(date = fct_output6m$date_list, 
                       L = fct_output6m$L, 
                       d_t = fct_output6m$d_t, 
                       D_t = fct_output6m$Delta_t)

4 Graph

Finally, we plot the results.

bind_rows(bind_cols(output1m, sample ="1 month"),
          bind_cols(output3m, sample ="3 month"),
          bind_cols(output6m, sample ="6 month")) %>%
    pivot_longer(-c(date, sample), names_to = "indicator", values_to = "value") %>%
    ggplot(aes(x = date, y = value, color = indicator)) + geom_line() + theme_light() + 
    facet_grid(indicator~sample, scales = "free",
               labeller = labeller(indicator = c(D_t = "Covariate shift", d_t = "Concept drift", L = "Loss"),
                                 sample = c(`1 month` = "1 month samples", 
                                            `3 month` = "3 months samples", 
                                            `6 month` = "6 months samples"))) + 
    scale_color_manual(values = c("#0D70CD", "#FC8B02", "#FC0255"), 
                       labels = unname(TeX(c("$\\delta_t$", "$10^{-2} \\times\\Delta_t$", "$10\\times loss$"))), 
                       name = "Metric") +
    theme(legend.position = c(0.89,0.19),
          strip.background =element_rect(fill="#777777"),
          legend.spacing = unit(-0.6, "cm"), legend.key.size = unit(0.5, "cm"), 
          legend.margin = unit(-0.6, "cm"), legend.title = element_text(size = 11, face = "bold"),
          legend.box.spacing = unit(-0.6, "cm")) 

# ggsave("ts2.pdf", width = 8, height = 5)

5 Regression analysis

Regression analysis: Loss in terms of SSR Regressed against covariate shift and concept drift, both squared + cross term.

5.1 One month samples

First, the dataset where all is stored. We start with one month samples for the estimators.

data_reg1m <- data.frame(date = fct_output1m$date_list, 
                         D_t2 = (fct_output1m$Delta_t*100)^2,           # Scaled
                         d_t2 = (fct_output1m$d_t)^2, 
                         D_t_dt = fct_output1m$Delta_t*fct_output1m$d_t, 
                         D_t = fct_output1m$Delta_t, 
                         d_t = fct_output1m$d_t, 
                         loss = fct_output1m$L)

Next, regressions! The output is shown in format table below.

fit_m1_all <- lm(loss ~ D_t2 + d_t2 + D_t_dt, data_reg1m)
m1_all <- fit_m1_all %>% tidy() %>% bind_cols(sample = "ALL")
fit_m1_1980 <-  lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                   data_reg1m %>% dplyr::filter(date < "1990-01-01"))
m1_1980 <-  fit_m1_1980 %>% tidy() %>% bind_cols(sample = "1980s")
fit_m1_1990 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg1m %>% dplyr::filter(date < "2000-01-01", date >= "1990-01-01"))
m1_1990 <- fit_m1_1990 %>% tidy() %>% bind_cols(sample = "1990s")
fit_m1_2000 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg1m %>% dplyr::filter(date < "2010-01-01", date >= "2000-01-01"))
m1_2000 <- fit_m1_2000 %>% tidy() %>% bind_cols(sample = "2000s")
fit_m1_2010 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg1m %>% dplyr::filter(date >= "2010-01-01"))
m1_2010 <-fit_m1_2010  %>% tidy() %>% bind_cols(sample = "2010+")

5.2 Three month samples

Then, 3 months samples.

data_reg3m <- data.frame(date = fct_output3m$date_list, 
                         D_t2 = (fct_output3m$Delta_t*100)^2,             # Scaled
                         d_t2 = (fct_output3m$d_t)^2, 
                         D_t_dt = fct_output3m$Delta_t*fct_output3m$d_t, 
                         D_t = fct_output3m$Delta_t, 
                         d_t = fct_output3m$d_t, 
                         loss = fct_output3m$L)
fit_m3_all <- lm(loss ~ D_t2 + d_t2 + D_t_dt, data_reg3m)
m3_all <- fit_m3_all %>% tidy() %>% bind_cols(sample = "ALL")
fit_m3_1980 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg3m %>% dplyr::filter(date < "1990-01-01"))
m3_1980 <- fit_m3_1980 %>% tidy() %>% bind_cols(sample = "1980s")
fit_m3_1990 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg3m %>% dplyr::filter(date < "2000-01-01", date >= "1990-01-01"))
m3_1990 <- fit_m3_1990 %>% tidy() %>% bind_cols(sample = "1990s")
fit_m3_2000 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg3m %>% dplyr::filter(date < "2010-01-01", date >= "2000-01-01"))
m3_2000 <- fit_m3_2000 %>% tidy() %>% bind_cols(sample = "2000s")
fit_m3_2010 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg3m %>% dplyr::filter(date >= "2010-01-01"))
m3_2010 <- fit_m3_2010 %>% tidy() %>% bind_cols(sample = "2010+")

5.3 Six month samples

Finally, the 6 month samples

data_reg6m <- data.frame(date = fct_output6m$date_list, 
                         D_t2 = (fct_output6m$Delta_t*100)^2,            # Scaled 
                         d_t2 = (fct_output6m$d_t)^2, 
                         D_t_dt = fct_output6m$Delta_t*fct_output6m$d_t, 
                         D_t = fct_output6m$Delta_t, 
                         d_t = fct_output6m$d_t, 
                         loss = fct_output6m$L)

fit_m6_all <- lm(loss ~ D_t2 + d_t2 + D_t_dt, data_reg6m)
m6_all <- fit_m6_all %>% tidy() %>% bind_cols(sample = "ALL")
fit_m6_1980 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg6m %>% dplyr::filter(date < "1990-01-01"))
m6_1980 <- fit_m6_1980 %>% tidy() %>% bind_cols(sample = "1980s")
fit_m6_1990 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg6m %>% dplyr::filter(date < "2000-01-01", date >= "1990-01-01"))
m6_1990 <- fit_m6_1990 %>% tidy() %>% bind_cols(sample = "1990s")
fit_m6_2000 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg6m %>% dplyr::filter(date < "2010-01-01", date >= "2000-01-01"))
m6_2000 <- fit_m6_2000 %>% tidy() %>% bind_cols(sample = "2000s")
fit_m6_2010 <- lm(loss ~ D_t2 + d_t2 + D_t_dt, 
                  data_reg6m %>% dplyr::filter(date >= "2010-01-01"))
m6_2010 <- fit_m6_2010 %>% tidy() %>% bind_cols(sample = "2010+")

5.4 The table

The long code for the table is located below.
The p-value are coded in colors: the redder the lower.
For the t-statistics, the ones that have an absolute value larger than 3 are highlighted in blue.
We use a threshold of three following the conclusions of Harvey et al (2016) … and the cross-section of expected return.
No highly significant \(t\)-statitistic is negative. We recall that the regression equation is: \[L_t = a + b_DD_t^2 +b_dd_t^2+b_{dD}d_tD_t+e_t\]

term 1 month sample 3 month sample 6 month sample
Estimate Std. Err. t-stat p-val Estimate Std. Err. t-stat p-val Estimate Std. Err. t-stat p-val
ALL
(Intercept) 0.842 0.085 9.854 0.000 0.108 0.062 1.751 0.081 0.102 0.012 8.622 0.000
D_t2 0.000 0.000 −2.026 0.043 0.000 0.000 −0.860 0.390 0.000 0.000 −2.654 0.008
d_t2 0.172 0.110 1.568 0.118 −0.338 0.587 −0.577 0.564 −0.195 0.299 −0.654 0.514
D_t_dt 6.458 0.770 8.391 0.000 6.178 0.694 8.909 0.000 2.664 0.194 13.706 0.000
1980s
(Intercept) 0.851 0.261 3.260 0.001 0.136 0.078 1.748 0.083 0.078 0.041 1.879 0.063
D_t2 0.000 0.000 −0.133 0.894 0.000 0.000 −0.501 0.617 0.000 0.000 −0.979 0.330
d_t2 5.086 2.476 2.054 0.042 −2.174 1.297 −1.676 0.096 1.508 3.444 0.438 0.662
D_t_dt 2.776 2.310 1.202 0.232 5.473 0.807 6.783 0.000 2.528 0.804 3.143 0.002
1990s
(Intercept) 0.965 0.164 5.879 0.000 0.297 0.048 6.177 0.000 0.103 0.027 3.818 0.000
D_t2 0.000 0.000 −0.120 0.905 0.000 0.000 −0.315 0.754 0.000 0.000 −1.350 0.180
d_t2 1.683 0.827 2.036 0.044 0.683 0.673 1.016 0.312 1.550 1.027 1.510 0.134
D_t_dt 4.345 2.105 2.064 0.041 3.360 0.795 4.227 0.000 3.593 0.838 4.290 0.000
2000s
(Intercept) 0.743 0.167 4.461 0.000 0.143 0.039 3.693 0.000 0.150 0.018 8.472 0.000
D_t2 0.000 0.000 2.038 0.044 0.000 0.000 1.358 0.177 0.000 0.000 1.019 0.310
d_t2 0.568 0.425 1.334 0.185 1.241 0.566 2.193 0.030 0.195 0.368 0.531 0.596
D_t_dt 3.439 2.549 1.349 0.180 2.380 0.774 3.073 0.003 0.439 0.508 0.866 0.388
2010+
(Intercept) 0.654 0.144 4.524 0.000 −0.142 0.196 −0.723 0.471 0.073 0.023 3.143 0.002
D_t2 0.000 0.000 −0.427 0.670 0.000 0.000 −0.284 0.777 0.000 0.000 1.583 0.116
d_t2 0.148 0.101 1.464 0.145 −0.679 1.135 −0.598 0.551 0.240 0.391 0.613 0.541
D_t_dt 6.576 1.308 5.026 0.000 11.848 2.193 5.401 0.000 1.801 0.495 3.641 0.000


The code for the table (using the gt package):

bind_cols(
    bind_rows(m1_all, m1_1980, m1_1990, m1_2000, m1_2010),
    bind_rows(m3_all, m3_1980, m3_1990, m3_2000, m3_2010) %>% select(-term, -sample),
    bind_rows(m6_all, m6_1980, m6_1990, m6_2000, m6_2010) %>% select(-term, -sample)
) %>%
    group_by(sample) %>%    
    gt() %>%
    cols_label(
        `estimate...2` = "Estimate",
        `estimate...7` = "Estimate",
        `estimate...11` = "Estimate",
        `std.error...3` = "Std. Err.",
        `std.error...8` = "Std. Err.",
        `std.error...12` = "Std. Err.",
        `statistic...4` = "t-stat",
        `statistic...9` = "t-stat",
        `statistic...13` = "t-stat",
        `p.value...5` = "p-val",
        `p.value...10` = "p-val",
        `p.value...14` = "p-val"
    ) %>%
    tab_spanner(
        label = "1 month sample",
        columns = 2:5
    ) %>%
    tab_spanner(
        label = "3 month sample",
        columns = 7:10
    ) %>%
    tab_spanner(
        label = "6 month sample",
        columns = 11:14
    ) %>%
    fmt_number(
        columns = 2:14,
        decimals = 3,
        use_seps = FALSE
    )  %>%
    tab_style(
        style = list(
            cell_fill(color = "#DDDDDD"),
            cell_text(weight = "bold")
        ),
        locations = cells_row_groups()
    ) %>%
    data_color(
        columns = c("p.value...5","p.value...10", "p.value...14"),
        colors = scales::col_numeric(
            palette = c(
                "#FE756A", "#FFF97B"),
            domain = c(0, 1),
            alpha = 0.5)
    ) %>%
    tab_style(
        style = list(
            cell_fill(color = "#A2BAE4"),
            cell_text(style = "italic")
        ),
        locations = cells_body(
            columns = "statistic...4",
            rows = abs(`statistic...4`) > 3
        )
    ) %>%
    tab_style(
        style = list(
            cell_fill(color = "#A2BAE4"),
            cell_text(style = "italic")
        ),
        locations = cells_body(
            columns = "statistic...9",
            rows = abs(`statistic...9`) > 3
        )
    )  %>%
    tab_style(
        style = list(
            cell_fill(color = "#A2BAE4"),
            cell_text(style = "italic")
        ),
        locations = cells_body(
            columns = "statistic...13",
            rows = abs(`statistic...13`) > 3
        )
    )

6 Positive auto-regressive models for shifts

We now perform the estimation of the models

\[X_{t+1} = bX_t + e_{t+1},\] where \(X\) are the shift metrics.

shift <- c(1, 3, 6)
L <- cbind(data_reg1m$loss,                                # Reorganize the data 
           data_reg3m$loss,
           data_reg6m$loss)
D_t <- cbind(data_reg1m$D_t,
               data_reg3m$D_t,
               data_reg6m$D_t)
d_t <- cbind(data_reg1m$d_t,
               data_reg3m$d_t,
               data_reg6m$d_t)
ar_pred_L <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)  # Initialization of the variables below
ar_pred_D <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
ar_pred_d <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
rho_L <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
rho_D <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
rho_d <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
m_er_L <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
m_er_D <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
m_er_d <- matrix(0, nrow = nrow(data_reg1m), ncol = 3)
ar_sample <- 60

for(t in (ar_sample+7):nrow(data_reg1m)){
  for(j in 1:3){
    L_sample <- L[(t-ar_sample-shift[j]+1):(t-shift[j]),j]
    rho_L[t,j] <- min(L_sample/lag(L_sample, shift[j]), na.rm=T)
    m_er <- mean(L_sample - rho_L[t,j] * lag(L_sample, shift[j]), na.rm=T)
    m_er_L[t,j] <- m_er
    ar_pred_L[t,j] <- min(L_sample/lag(L_sample, shift[j]), na.rm=T) * L_sample[length(L_sample)] + m_er
    
    D_sample <- D_t[(t-ar_sample-shift[j]+1):(t-shift[j]),j]
    rho_D[t,j] <- min(D_sample/lag(D_sample, shift[j]), na.rm=T)
    m_er <- mean(D_sample - rho_D[t,j] * lag(D_sample, shift[j]), na.rm=T)
    m_er_D[t,j] <- m_er
    ar_pred_D[t,j] <- min(D_sample/lag(D_sample, shift[j]), na.rm=T) * D_sample[length(D_sample)] + m_er
    
    d_sample <- d_t[(t-ar_sample-shift[j]+1):(t-shift[j]),j]
    rho_d[t,j] <- min(d_sample/lag(d_sample, shift[j]), na.rm=T)
    m_er <- mean(d_sample - rho_d[t,j] * lag(d_sample, shift[j]), na.rm=T)
    m_er_d[t,j] <- m_er
    ar_pred_d[t,j] <- min(d_sample/lag(d_sample, shift[j]), na.rm=T) * d_sample[length(d_sample)] + m_er
  }
}

Next, some wrangling & the plots.

rho_L <- data_reg1m$date %>% 
  bind_cols(data.frame(rho_L)) %>% mutate(type = "Loss (L)")
colnames(rho_L) <- c("date", "1m", "3m", "6m", "type")
rho_D <- data_reg1m$date %>% 
  bind_cols(data.frame(rho_D)) %>% mutate(type = "Covariate shift (D)")
colnames(rho_D) <- c("date", "1m", "3m", "6m", "type")
rho_d <- data_reg1m$date %>% 
  bind_cols(data.frame(rho_d)) %>% mutate(type = "Concept drift (d)")
colnames(rho_d) <- c("date", "1m", "3m", "6m", "type")
ar1 <- bind_rows(rho_L, rho_D, rho_d) %>%
  filter(date > "1985-12-01") %>%     # There is a buffer period for the first estimation...
  pivot_longer(c("1m", "3m", "6m"), names_to = "sample", values_to = "value") %>%
  ggplot(aes(x = date, y = value, color = sample)) + geom_line() + facet_wrap(vars(type)) + theme_light() +
  theme(legend.position = c(0.8,0.83)) + ylab("Autoregressive coefficient") + xlab(element_blank())

d_t <- bind_cols(data_reg1m$date, data.frame(d_t))
D_t <- bind_cols(data_reg1m$date, data.frame(D_t))
colnames(d_t) <- c("date", "1m", "3m", "6m")
colnames(D_t) <- c("date", "1m", "3m", "6m")
d_t <- d_t %>% pivot_longer(-date, names_to = "sample", values_to = "value")
D_t <- D_t %>% pivot_longer(-date, names_to = "sample", values_to = "value")

er_L <- data_reg1m$date %>% 
  bind_cols(data.frame(m_er_L)) %>% mutate(type = "Loss (L)")
colnames(er_L) <- c("date", "1m", "3m", "6m", "type")
er_D <- data_reg1m$date %>% 
  bind_cols(data.frame(m_er_D)) %>% mutate(type = "Covariate shift (D)")
colnames(er_D) <- c("date", "1m", "3m", "6m", "type")
er_d <- data_reg1m$date %>% 
  bind_cols(data.frame(m_er_d)) %>% mutate(type = "Concept drift (d)")
colnames(er_d) <- c("date", "1m", "3m", "6m", "type")

ar2 <- bind_rows(er_L, er_D, er_d) %>%
  filter(date > "1985-12-01") %>%
  pivot_longer(c("1m", "3m", "6m"), names_to = "sample", values_to = "value") %>%
  ggplot(aes(x = date, y = value, color = sample)) + geom_line() + facet_wrap(vars(type), scales = "free") + theme_light() +
  theme(legend.position = c(0.53,0.83)) + ylab("Average error") + xlab(element_blank())
ar1 / ar2

# ggsave("ar2.pdf", width = 7, height = 4)