# Preliminaries -----------------------------------------------------------

# install.packages("fHMM")
# install.packages("fHMM_1.3.0.tar.gz", repos = NULL, type ="source")
library("fHMM")
stopifnot(packageVersion("fHMM") >= '1.3.0')


# Download DAX data -------------------------------------------------------

dax <- download_data(symbol = "^GDAXI", from = "2000-01-01", to = "2022-12-31")
head(dax)


# Example 1: DAX ----------------------------------------------------------

### define controls
contr_dax <- list(
  states = 3, sdds = "t", 
  data = list(
    file = dax, date_column = "Date", data_column = "Close", 
    logreturns = TRUE, from = "2000-01-03", to = "2022-12-31"
  ),
  fit = list(
    runs = 100, iterlim = 300, gradtol = 1e-6, steptol = 1e-6
  )
)

### set controls
contr_dax <- set_controls(contr_dax)
class(contr_dax)

### prepare data
data_dax <- prepare_data(contr_dax)
summary(data_dax)

### plot DAX time series
events_dax <- fHMM_events(
  list(
    dates = c("2001-09-11", "2008-09-15", "2020-01-27"),
    labels = c("9/11 terrorist attack", "Bankruptcy of Lehman Brothers", 
               "First COVID-19 case in Germany")
  )
)
plot(data_dax, events = events_dax, title = "DAX time series")

### fit model
### - executing the following line takes 6 minutes on our device
### - strategies to reduce the computation time (results will be less reliable):
###   - reduce the values for 'runs' and 'iterlim' in 'control_dax' above
###   - increase the values for 'gradtol' and 'steptol' in 'control_dax' above
###   - increase the value for 'ncluster' in the line below, if more CPU cores
###     are available on your device (you can check 'parallel::detectCores()'
###     to detect the number of available CPU cores)
### - if you receive one of the following error messages, you need to decrease 
###   the value for 'ncluster' (in the most extreme case to '1' for sequential
###   computation):
###   - "error reading from connection"
###   - "all ... connections are in use"
dax_model_3t <- fit_model(data_dax, seed = 2, ncluster = 10)

### access model coefficients and confidence intervals
coef(dax_model_3t, alpha = 0.05)

### access transition probability matrix
Gamma <- parUncon2par(dax_model_3t$estimate, contr_dax)$Gamma
round(Gamma, 3)

### compute stationary distribution
delta <- oeli::stationary_distribution(Gamma)
round(delta, 3)

### plot likelihood values and state-dependent distributions
op <- par(mfrow = c(1, 2))
plot(dax_model_3t, plot_type = c("ll", "sdds"))
par(op)

### decode hidden states
dax_model_3t <- decode_states(dax_model_3t)
table(dax_model_3t$decoding)

### plot decoded time series
plot(dax_model_3t, events = events_dax)

### prediction
predict(dax_model_3t, ahead = 5)

### compute residuals
dax_model_3t <- compute_residuals(dax_model_3t)

### plot pseudo-residuals
plot(dax_model_3t, plot_type = "pr")

### Jarque-Bera test
res <- residuals(dax_model_3t)
tseries::jarque.bera.test(res)

### competing model
contr_dax_comp <- list(
  states = 2, sdds = "normal", 
  fit = list(
    runs = 100, iterlim = 300, gradtol = 1e-6, steptol = 1e-6
  )
)
dax_model_2n <- fit_model(
  data_dax, controls = contr_dax_comp, seed = 1, ncluster = 10
)

### compare models
compare_models(dax_model_2n, dax_model_3t)


# Example 2: Simulation ---------------------------------------------------

### define and set controls
contr_sim <- list(
  states = 2, sdds = "gamma(mu = 1|2)", horizon = 200, 
  fit = list(runs = 50)
)
contr_sim <- set_controls(contr_sim)
print(contr_sim)

### simulate data
pars <- fHMM_parameters(
  controls = contr_sim, Gamma = matrix(c(0.9, 0.2, 0.1, 0.8), nrow = 2), 
  sigma = c(0.1, 0.5)
)
data_sim <- prepare_data(contr_sim, true_parameters = pars, seed = 1)

### plot simulated time series
plot(data_sim)

### fit model
sim_model_2gamma <- fit_model(data_sim, seed = 1)

### summary output
summary(sim_model_2gamma)


# Example 3: Multiple data streams ----------------------------------------

### define and set controls
contr_hhmm <- list(
  hierarchy = TRUE, states = c(3, 2), 
  sdds = c("t", "t"), period = "m",
  data = list(
    file = list(unemp, spx), 
    data_column = c("rate_diff", "Close"), 
    date_column = c("date", "Date"), 
    from = "1970-01-01", to = "2020-01-01", 
    logreturns = c(FALSE, TRUE)
  ),
  fit = list(runs = 50, iterlim = 1000, gradtol = 1e-6, steptol = 1e-6)
)
contr_hhmm <- set_controls(contr_hhmm)

### prepare data
data_hhmm <- prepare_data(contr_hhmm)

### plot data streams
plot(data_hhmm, title = "S&P 500 and differences in US unemployment rate")

### fit model
### - executing the following chunk takes about 477 minutes on our device
### - please note the advice given in 'Example 1: DAX' above on how to reduce
###   the computation time to get a faster (but possibly less reliable) result
### - for reproducibility purposes, the model has been pre-computed and can
###   be accessed from the package as follows
unemp_spx_model_3_2_type2 <- fHMM::unemp_spx_model_3_2

if (TRUE) {
  
  ### refit the model
  unemp_spx_model_3_2 <- fit_model(data_hhmm, seed = 1, ncluster = 25)
  
  ### reproduce the state order
  state_order <- matrix(c(3, 2, 1, 2, 2, 2, 1, 1, 1), 3, 3)
  unemp_spx_model_3_2 <- reorder_states(unemp_spx_model_3_2, state_order)
}

### access model coefficients and confidence intervals
coef(unemp_spx_model_3_2, alpha = 0.05)

### plot estimated state-dependent distributions
plot(unemp_spx_model_3_2, plot_type = "sdds")

### plot decoded data streams
unemp_spx_model_3_2 <- decode_states(unemp_spx_model_3_2)
events_spx <- fHMM_events(
  list(
    dates = c("1980-01-01", "2000-03-01", "2008-01-01"),
    labels = c("1980s recession", "Dot-com bubble", "Global financial crisis 2008")
  )
)
plot(unemp_spx_model_3_2, events = events_spx)