## ----preliminaries, echo=FALSE, results='hide'---------------------- # Using knitr for manuscript library("knitr") # render_sweave() opts_chunk$set(engine = 'R', tidy = FALSE) # JSS code formatting options( prompt = "R> ", continue = "+ ", width = 70, useFancyQuotes = FALSE ) # Formatting options(scipen = 1, digits = 3) Sys.setenv(LANG = 'en') # RNG initialization set.seed(42) # Required packages library("scoringRules") library("crch") library("ggplot2") ## ----Chaining-func-example, echo=FALSE, dev='pdf', fig.width=2.5, ## fig.height=2.5, fig.align="center"---- a1 <- a2 <- 2.5 b1 <- b2 <- 7.5 x <- pmin(pmax(rnorm(10, 5, 1.5), 0.5), 9.5) y <- pmin(pmax(rnorm(10, 5, 1.5), 0.5), 9.5) w_ind <- (x < a1) | (x > b1) | (y < a2) | (y > b2) v_x <- pmin(pmax(x, a1), b1) v_y <- pmin(pmax(y, a2), b2) ggplot() + geom_rect(aes( xmin = a1, xmax = b1, ymin = a2, ymax = b2 ), fill = "lightblue") + scale_x_continuous( expand = c(0, 0), limits = c(0, 10), breaks = c(a1, b1), labels = c(expression(a[1]), expression(b[1])), name = expression(z[1]) ) + scale_y_continuous( expand = c(0, 0), limits = c(0, 10), breaks = c(a2, b2), labels = c(expression(a[2]), expression(b[2])), name = expression(z[2]) ) + theme_bw() + theme(panel.grid = element_blank()) + geom_segment( aes( x = x[w_ind], y = y[w_ind], xend = v_x[w_ind], yend = v_y[w_ind] ), lineend = "round", linejoin = "round", linewidth = 1, arrow = arrow(length = unit(0.1, "cm")), col = "red" ) + geom_point(aes(x = x, y = y), size = 2) + geom_point(aes(x = v_x, y = v_y), size = 2, shape = 4) ## ----Univ-weighted-score-example-1---------------------------------- obs <- rnorm(5) sample_m <- matrix(rnorm(5e4), nrow = 5) score_df <- data.frame( crps = crps_sample(obs, sample_m), owcrps = owcrps_sample(obs, sample_m), twcrps = twcrps_sample(obs, sample_m) ) print(score_df) ## ----Univ-weighted-score-example-2---------------------------------- t <- 0 score_df <- data.frame( crps = crps_sample(obs, sample_m), owcrps = owcrps_sample(obs, sample_m, a = t), twcrps = twcrps_sample(obs, sample_m, a = t) ) print(score_df) ## ----Weighted-score-illustration, echo=FALSE, dev='pdf', fig.width=10.4, ## fig.height=4.1, fig.align="center", out.width = "\\linewidth"---- y <- rnorm(1000) dat_norm <- matrix(rnorm(1000 * 100), nrow = 1000) dat_logis <- matrix(rlogis(1000 * 100), nrow = 1000) x <- seq(-3, 3, 0.1) ow_scores_norm <- sapply(x, function(t) mean(owcrps_sample(y, dat_norm, a = t))) tw_scores_norm <- sapply(x, function(t) mean(twcrps_sample(y, dat_norm, a = t))) ow_scores_logis <- sapply(x, function(t) mean(owcrps_sample(y, dat_logis, a = t))) tw_scores_logis <- sapply(x, function(t) mean(twcrps_sample(y, dat_logis, a = t))) # Initialize plot par(mai = c(0.9, 3.5, 0.3, 3.4), cex = 1.1) plot( x, tw_scores_norm, type = "l", col = "purple", lwd = 2, xlim = c(-3, 3), ylim = c(0, 1), xlab = "Threshold a", ylab = "Score value", xaxt = "n", yaxt = "n" ) lines(x, ow_scores_norm, col = "darkorange", lwd = 2) lines( x, tw_scores_logis, col = "purple", lty = "dashed", lwd = 2 ) lines( x, ow_scores_logis, col = "darkorange", lty = "dashed", lwd = 2 ) axis(1, at = c(-3, 0, 3)) axis(2, at = c(0, 0.5, 1)) # Add legend legend( "top", bty = "n", legend = c("twCRPS", "owCRPS", "Normal", "Logistic"), col = c("purple", "darkorange", "grey", "grey"), lty = c(1, 1, 1, 2), lwd = c(2, 2), ncol = 2 ) ## ----Multiv-weighted-score-example-1-------------------------------- d <- length(obs) twes_sample(obs, sample_m, a = t) twes_sample(obs, sample_m, a = rep(t, d)) ## ----Custom-weight-function-example-1------------------------------- mu <- 0 sigma <- 1 weight_func <- function(x) pnorm(x, mean = mu, sd = sigma) owcrps_sample(obs, sample_m, weight_func = weight_func) ## ----Custom-weight-function-example-2------------------------------- mu <- rnorm(d, 0, 0.5) sigma <- runif(d, 0.5, 1.5) weight_func <- function(x) prod(pnorm(x, mean = mu, sd = sigma)) owes_sample(obs, sample_m, weight_func = weight_func) ## ----Custom-weight-function-example-3------------------------------- weight_func <- get_weight_func(name = "norm_cdf", mu = 0, sigma = 1) owcrps_sample(obs, sample_m, weight_func = weight_func) ## ----Custom-weight-function-example-4------------------------------- weight_func <- get_weight_func(name = "norm_cdf", mu = mu, sigma = sigma) owes_sample(obs, sample_m, weight_func = weight_func) ## ----Custom-weight-function-example-5------------------------------- chain_func <- function(x) (x - mu) * pnorm(x, mu, sigma) + (sigma ^ 2) * dnorm(x, mu, sigma) mu <- 0 sigma <- 1 twcrps_sample(obs, sample_m, chain_func = chain_func) mu <- rnorm(d, 0, 0.5) sigma <- runif(d, 0.5, 1.5) twes_sample(obs, sample_m, chain_func = chain_func) ## ----Custom-weight-function-example-6------------------------------- chain_func <- get_weight_func("norm_cdf", mu = 0, sigma = 1, weight = FALSE) twcrps_sample(obs, sample_m, chain_func = chain_func) chain_func <- get_weight_func("norm_cdf", mu = mu, sigma = sigma, weight = FALSE) twes_sample(obs, sample_m, chain_func = chain_func) ## ----Prepare-post-processing-example, echo=FALSE-------------------- # load data data("RainIbk", package = "crch") RainIbk <- sqrt(RainIbk) ensfc <- RainIbk[, grep('^rainfc', names(RainIbk))] RainIbk$ensmean <- apply(ensfc, 1, mean) RainIbk$enssd <- apply(ensfc, 1, sd) RainIbk <- subset(RainIbk, enssd > 0) # split into training and test data data_train <- subset(RainIbk, as.Date(rownames(RainIbk)) <= "2004-11-30") data_eval <- subset(RainIbk, as.Date(rownames(RainIbk)) >= "2005-01-01") # fit post-processing models CRCHgauss <- crch( rain ~ ensmean | log(enssd), data = data_train, left = 0, dist = "gaussian" ) gauss_mu <- predict(CRCHgauss, data_eval, type = "location") gauss_sc <- predict(CRCHgauss, data_eval, type = "scale") CRCHlogis <- crch( rain ~ ensmean | log(enssd), data = data_train, left = 0, dist = "logistic" ) logis_mu <- predict(CRCHlogis, data_eval, type = "location") logis_sc <- predict(CRCHlogis, data_eval, type = "scale") CRCHstud <- crch( rain ~ ensmean | log(enssd), data = data_train, left = 0, dist = "student" ) stud_mu <- predict(CRCHstud, data_eval, type = "location") stud_sc <- predict(CRCHstud, data_eval, type = "scale") stud_df <- CRCHstud$df # get raw ensemble forecast ens_fc <- data_eval[, grep('^rainfc', names(RainIbk))] obs <- data_eval$rain ## ----Computing-scores-Gauss----------------------------------------- gauss_crps <- crps_cnorm( y = obs, location = gauss_mu, scale = gauss_sc, lower = 0, upper = Inf ) ## ----Computing-scores-logis-stud, echo=FALSE------------------------ ens_crps <- crps_sample(obs, dat = as.matrix(ens_fc)) logis_crps <- crps_clogis( obs, location = logis_mu, scale = logis_sc, lower = 0, upper = Inf ) stud_crps <- crps_ct( obs, df = stud_df, location = stud_mu, scale = stud_sc, lower = 0, upper = Inf ) ## ----Calculating-average-scores------------------------------------- scores <- data.frame( Logistic = logis_crps, Gaussian = gauss_crps, Students_t = stud_crps, Ensemble = ens_crps ) sapply(scores, mean) ## ----Post-processing-sample----------------------------------------- ens_size <- 1000 n <- length(obs) gauss_sample <- replicate(ens_size, rnorm(n, gauss_mu, gauss_sc)) gauss_sample[gauss_sample < 0] <- 0 ## ----Post-processing-twcrps-Gauss----------------------------------- t <- sqrt(30) gauss_twcrps <- twcrps_sample(y = obs, dat = gauss_sample, a = t) ## ----Post-processing-twcrps-logis-stud, echo=FALSE------------------ ens_twcrps <- twcrps_sample(obs, dat = as.matrix(ens_fc), a = t) logis_sample <- replicate(ens_size, rlogis(n, logis_mu, logis_sc)) logis_sample[logis_sample < 0] <- 0 logis_twcrps <- twcrps_sample(obs, dat = logis_sample, a = t) stud_sample <- replicate(ens_size, rt(n, df = stud_df) * stud_sc + stud_mu) stud_sample[stud_sample < 0] <- 0 stud_twcrps <- twcrps_sample(obs, dat = stud_sample, a = t) ## ----Calculating-average-weighted-scores---------------------------- scores <- data.frame( Logistic = logis_twcrps, Gaussian = gauss_twcrps, Students_t = stud_twcrps, Ensemble = ens_twcrps ) sapply(scores, mean) ## ----Post-processing-twcrps-custom-Gauss---------------------------- sigma <- 1 weight_func <- get_weight_func("norm_cdf", mu = t, sigma = sigma) chain_func <- get_weight_func("norm_cdf", mu = t, sigma = sigma, weight = FALSE) gauss_twcrps <- twcrps_sample(obs, gauss_sample, chain_func = chain_func) ## ----Post-processing-twcrps-custom-logis-stud, echo=FALSE----------- ens_twcrps <- twcrps_sample(obs, dat = as.matrix(ens_fc), chain_func = chain_func) logis_twcrps <- twcrps_sample(obs, dat = logis_sample, chain_func = chain_func) stud_twcrps <- twcrps_sample(obs, dat = stud_sample, chain_func = chain_func) ## ----Calculating-average-weighted-scores-custom--------------------- scores <- data.frame( CRCHlogis = logis_twcrps, CRCHgauss = gauss_twcrps, CRCHstud = stud_twcrps, Ensemble = ens_twcrps ) sapply(scores, mean) ## ----Data-MCMC-example, echo=FALSE---------------------------------- data("gdp", package = "scoringRules") data_train <- subset(gdp, vint == "2014Q1") data_eval <- subset(gdp, vint == "2015Q1" & grepl("2014", dt)) h <- 4 m <- 5000 # use modest number of MCMC iterations for computational simplicity fc_params <- ar_ms(data_train$val, forecast_periods = h, n_rep = m) mu <- t(fc_params$fcMeans) Sd <- t(fc_params$fcSds) X <- matrix(rnorm(h * m, mean = mu, sd = Sd), nrow = h, ncol = m) obs <- data_eval$val names(obs) <- data_eval$dt ## ----Univ-scores-MCMC-example--------------------------------------- scores_crps <- crps_sample(obs, X) scores_logs <- logs_sample(obs, X) print(cbind(scores_crps, scores_logs)) ## ----Weighted-univ-scores-MCMC-example------------------------------ t <- 0 scores_owcrps <- owcrps_sample(obs, X, b = t) scores_twcrps <- twcrps_sample(obs, X, b = t) scores_cols <- clogs_sample(obs, X, b = t, cens = FALSE) scores_cels <- clogs_sample(obs, X, b = t) print(cbind(scores_owcrps, scores_twcrps, scores_cols, scores_cels)) ## ----Custom-weight-MCMC-example------------------------------------- a <- 0 b <- 9 weight_func <- function(x) as.numeric((x < a) | (x > b)) chain_func <- function(x) (x < a) * (x - a) + (x > b) * (x - b) + a ## ----Custom-weighted-univ-scores-MCMC-example----------------------- scores_owcrps <- owcrps_sample(obs, X, weight_func = weight_func) scores_twcrps <- twcrps_sample(obs, X, chain_func = chain_func) print(cbind(scores_owcrps, scores_twcrps)) ## ----Weighted-multiv-scores-MCMC-example---------------------------- d <- 2 scores_twes <- twes_sample(obs[1:d], X[1:d, ], b = 0) scores_twvs <- twvs_sample(obs[1:d], X[1:d, ], b = 0) scores_twmmds <- twmmds_sample(obs[1:d], X[1:d, ], b = 0) print(cbind(scores_twes, scores_twvs, scores_twmmds)) # Session Info ------------------------------------------------------------ SessionInfo()