library("anomaly") library("ggplot2") ## Example 1 set.seed(0) x <- rnorm(5000) x[401:500] <- rnorm(100, 4, 1) x[1601:1800] <- rnorm(200, 0, 0.01) x[3201:3500] <- rnorm(300, 0, 10) x[c(1000, 2000, 3000, 4000)] <- rnorm(4, 0, 100) x <- (x - median(x))/mad(x) res <- capa(x) summary(res) plot(res) ## Example 2 res <- capa(x, type = "mean") collective_anomalies(res) head(point_anomalies(res)) plot(res) ## Example 2 a res <- capa(1 + 2 * x, type = "mean") nrow(collective_anomalies(res)) ## Example 4 data("machinetemp", package = "anomaly") attach(machinetemp) x <- (temperature - median(temperature)) / mad(temperature) res <- capa(x, type = "mean") canoms <- collective_anomalies(res) dim(canoms)[1] library("robustbase") n <- length(x) x.lagged <- matrix(c(x[1:(n - 1)], x[2:n]), n - 1, 2) rho_hat <- covMcd(x.lagged, cor = TRUE)$cor[1, 2] inflated_penalty <- 3 * (1 + rho_hat) / (1 - rho_hat) * log(n) res <- capa(x, type = "mean", beta = inflated_penalty, beta_tilde = inflated_penalty) summary(res) plot(res) ## Code used to generate Figure 2a) p <- ggplot(data = machinetemp, aes(x = 1:22695, y = temperature)) p <- p + geom_point(alpha = 0.3) xanom <- c(3987, 16344, 19516) yanom <- c(machinetemp$temperature[xanom[1]], machinetemp$temperature[xanom[2]], machinetemp$temperature[xanom[3]]) p <- p + geom_point(aes(x = xanom[1], y = yanom[1]), color = "black", size = 3, fill = "red", pch = 21, alpha = 0.3) p <- p + geom_point(aes(x = xanom[2], y = yanom[2]), color = "black", size = 3, fill = "red", pch = 21, alpha = 0.3) p <- p + geom_point(aes(x = xanom[3], y = yanom[3]), color = "black", size = 3, fill = "red", pch = 21, alpha = 0.3) p <- p + theme_bw() p <- p + theme(axis.text.y = element_blank()) p <- p + labs(x = "t") p <- p + theme(strip.text.y = element_blank()) print(p) ## Example 5 data("simulated", package = "anomaly") res <- capa(sim.data, type = "mean", min_seg_len = 2) plot(res, subset = 1:20) beta <- 2 * log(ncol(sim.data):1) beta[1] <- beta[1] + 3 * log(nrow(sim.data)) res <- capa(sim.data, type = "mean", min_seg_len = 2, beta = beta) plot(res, subset = 1:20) ## Example 6 set.seed(0) x1 <- rnorm(500) x2 <- rnorm(500) x3 <- rnorm(500) x4 <- rnorm(500) x1[151:200] <- x1[151:200] + 2 x2[171:200] <- x2[171:200] + 2 x3[161:190] <- x3[161:190] - 3 x1[351:390] <- x1[371:390] + 2 x3[351:400] <- x3[351:400] - 3 x4[371:400] <- x4[371:400] + 2 x4[451] <- x4[451] * max(1, abs(1 / x4[451])) * 6 x4[100] <- x4[100] * max(1, abs(1 / x4[100])) * 6 x2[050] <- x2[050] * max(1, abs(1 / x2[050])) * 6 x1 <- (x1 - median(x1)) / mad(x1) x2 <- (x2 - median(x2)) / mad(x2) x3 <- (x3 - median(x3)) / mad(x3) x4 <- (x4 - median(x4)) / mad(x4) x <- cbind(x1, x2, x3, x4) res <- capa(x, max_lag = 20, type = "mean") plot(res) ## Example 7 data("simulated", package = "anomaly") res <- pass(sim.data, max_seg_len = 20, alpha = 3) print(collective_anomalies(res)) ## Example 8 data("simulated", package = "anomaly") bard.res <- bard(sim.data) sampler.res <- sampler(bard.res, gamma = 1/3, num_draws = 1000) show(sampler.res) plot(sampler.res, marginals = TRUE) ## Micro Array Data library("ecp") library("gridExtra") data("ACGH", package = "ecp") acgh <- ACGH[[1]][, 1:20] ac_corrected <- function(X){ n <- length(X) rcor <- covMcd(matrix(c(X[2:n], X[1:(n-1)]), ncol = 2), cor = TRUE) psi <- rcor$cor[1, 2] correction_factor <- sqrt((1 - psi) / (1 + psi)) return(correction_factor * (X - median(X)) / mad(X)) } acgh <- acgh |> data.frame() |> sapply(ac_corrected) res.capa <- capa(acgh, type = "mean", max_lag = 50, max_seg_len = 200) res.pass <- pass(acgh, max_seg_len = 200, alpha = 3) canoms.capa <- collective_anomalies(res.capa) # get the strongest n anomalies from pass n <- 10 canoms.pass <- res.pass@results[1:n, ] pass.summary <- cbind(canoms.pass[, 1:2], data.frame("log.xstar" = log(canoms.pass$xstar), "strength" = log(canoms.pass$xstar)/log(canoms.pass$xstar[1]))) # get the strongest n anomalies from capa locations <- unique(canoms.capa[, 1:2]) capa.locations <- locations[order(unlist(Map(function(start) sum(canoms.capa[canoms.capa[, 1] == start, ]$mean.change), locations[, 1])), decreasing = TRUE), ][1:n, ] capa.test.stats <- sort(unlist(Map(function(start) sum(canoms.capa[canoms.capa[, 1] == start, ]$mean.change), locations[, 1])), decreasing = TRUE)[1:n] capa.summary <- cbind(capa.locations, data.frame("mean.change" = capa.test.stats, "strength" = capa.test.stats/capa.test.stats[1])) # merge results combined.summary <- cbind(capa.summary, pass.summary) x <- rep(0, nrow(acgh)) invisible(Map(function(a, b, v) x[a:b] <<- v, combined.summary[, 1], combined.summary[, 2], combined.summary[, 4])) x.df <- data.frame("n" = 1:nrow(acgh), "strength" = x) p.capa <- ggplot(x.df, aes(x = n, y = strength)) p.capa <- p.capa + geom_point() p.capa <- p.capa + theme_bw() p.capa <- p.capa + theme(axis.text.y = element_blank()) x <- rep(0, nrow(acgh)) invisible(Map(function(a, b, v) x[a:b] <<- v, combined.summary[, 5], combined.summary[, 6], combined.summary[, 8])) x.df <- data.frame("n" = 1:nrow(acgh), "strength" = x) p.pass <- ggplot(x.df, aes(x = n, y = strength)) p.pass <- p.pass + geom_point() p.pass <- p.pass + theme_bw() p.pass <- p.pass + theme(axis.text.y = element_blank()) combined.summary <- cbind(combined.summary, data.frame("rank" = factor(seq(1:10)))) df <- data.frame() p <- ggplot(df) + geom_point() + xlim(1, nrow(acgh)) + ylim(0, 1) p <- p + geom_rect(data = combined.summary[1:10, c(5, 6, 9)], inherit.aes = FALSE, mapping = aes(xmin = .data$start, xmax = .data$end, ymin = 0, ymax = 1), fill = "red", alpha = 0.5, color = "red", show.legend = FALSE) p <- p + theme_bw() p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) p <- p + theme(axis.text.y = element_blank()) p <- p + theme(axis.ticks.y = element_blank()) object <- res.capa X <- object@data subset <- 1:ncol(X) epoch <- nrow(X) X <- X[, subset, drop = FALSE] variate_names <- FALSE if (length(colnames(X)) != ncol(X)){ variate_names <- FALSE } if (!variate_names){ colnames(X) <- paste(subset) } nms <- colnames(X) X <- data.frame(x = rep(1:nrow(X), ncol(X)), variable = rep(colnames(X), each = nrow(X)), value = as.numeric(X)) out <- ggplot(X, aes(x = .data$x, y = .data$value)) + geom_point(alpha = 0.3) c_anoms <- collective_anomalies(object, epoch = epoch) c_anoms <- c_anoms[c_anoms$variate %in% subset, ] c_anoms <- c_anoms[c_anoms$start %in% combined.summary[, 1], ] if (nrow(c_anoms) > 0){ c_anoms$variable <- nms[c_anoms$variate] c_anoms$ymin <- -Inf c_anoms$ymax <- Inf c_anoms$start_with_lag <- c_anoms$start + c_anoms$start.lag c_anoms$end_with_lag <- c_anoms$end - c_anoms$end.lag out <- out + geom_rect(data = c_anoms, inherit.aes = FALSE, mapping = aes(xmin = .data$start, xmax = .data$end, ymin = .data$ymin, ymax = .data$ymax), fill = "blue", alpha = 0.2) out <- out + geom_rect(data = c_anoms, inherit.aes = FALSE, mapping = aes(xmin = .data$start_with_lag, xmax = .data$end_with_lag, ymin = .data$ymin, ymax = .data$ymax), alpha = 0.5, fill = "blue") } ## highlight the point anomalies p_anoms <- point_anomalies(object, epoch) p_anoms <- p_anoms[p_anoms$variate %in% subset, ] if (nrow(p_anoms) > 0) { p_anoms_df <- do.call(rbind, Map(function(a, b) X[X$variate == a & X$x == b, ], p_anoms$variate, p_anoms$location)) out <- out + geom_point(data = p_anoms_df, colour = "red", size = 1.5, alpha = 0.3) } ## grey out the data after epoch if (epoch != nrow(object@data)) { d <- data.frame(variable = nms[subset], x1 = epoch, x2 = nrow(object@data), y1 = -Inf, y2 = Inf) out <- out + geom_rect(data = d, inherit.aes = F, mapping = aes(xmin = .data$x1, xmax = .data$x2, ymin = .data$y1, ymax = .data$y2), fill = "yellow", alpha = 0.2) } ## rest of layout out <- out + facet_grid(.data$variable~., scales = "free_y") out <- out + theme_bw() + theme(axis.text.y = element_blank()) if (variate_names == FALSE) { out <- out + theme(strip.text.y = element_blank()) } out <- out + theme(axis.text.y = element_blank()) out <- out + theme(axis.ticks.y = element_blank()) out <- out + theme(axis.title.y = element_blank()) out <- out + theme(axis.title.x = element_blank()) out <- out + theme(axis.text.x = element_blank()) out <- out + theme(axis.ticks.x = element_blank()) g <- grid.arrange(out, p, nrow = 2, ncol = 1, heights = c(10, 1))