Merge pull request #45 from ssi-dk/add_choice_flag

Add possibility to get results for all seasons or only current season
ssi-dk · Dec 5, 2024 · dedc6e9 · dedc6e9
2 parents db12e03 + 98d73c1
commit dedc6e9
Show file tree

Hide file tree

Showing 11 changed files with 185 additions and 58 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,8 @@
 
 ## Features
 
+* Added a new argument `only_current_season` to `seasonal_onset()`, `seasonal_burden_levels()` and `combined_seasonal_output()` which gives the possibility to either get output from only the current season or for all available seasons (#45).
+
 * Added `combined_seasonal_output()` as the main function to run both `seasonal_onset()` and `seasonal_burden_levels()` to get a combined result for the newest season (#44).
 
 * Added `seasonal_onset()` as a replacement for the deprecated `aedseo()` function (#41).

diff --git a/R/0_documentation.R b/R/0_documentation.R
@@ -19,13 +19,12 @@ rd_family <- function(usage = NULL) {
   paste("A character string specifying the family for modeling",
         ifelse(usage == "combined", paste(" seasonal onset.")))
 }
+rd_only_current_season <- "Should the output only include results for the current season?"
 rd_season_weeks <- function(usage = NULL) {
   paste("A numeric vector of length 2, `c(start,end)`, with the start and end weeks of the seasons to
   stratify the observations by. Must span the new year; ex: `season_weeks = c(21, 20)`.",
         ifelse(usage == "onset", paste("If set to `NULL`, is  means no stratification by season.")))
 }
-rd_tsd <- "An object containing time series data with 'time' and 'observation.'"
-
 rd_seasonal_onset_return <- paste(
   "\nA `seasonal_onset` object containing:\n",
   "- 'reference_time': The time point for which the growth rate is estimated.\n",
@@ -41,7 +40,6 @@ rd_seasonal_onset_return <- paste(
   "- 'skipped_window': Logical. Was the window skipped due to missing?\n",
   "- 'converged': Logical. Was the IWLS judged to have converged?"
 )
-
 rd_seasonal_burden_levels_return <- paste(
   "\nA list containing:\n",
   "- 'season': The season that burden levels are calculated for.\n",
@@ -66,3 +64,4 @@ rd_seasonal_burden_levels_return <- paste(
   "   - 'exp': Uses the Exponential distribution for fitting.\n",
   "   - 'disease_threshold': The input disease threshold, which is also the very low level."
 )
+rd_tsd <- "An object containing time series data with 'time' and 'observation.'"
diff --git a/R/combined_seasonal_output.R b/R/combined_seasonal_output.R
@@ -83,24 +83,31 @@ combined_seasonal_output <- function(
   conf_levels = 0.95,
   decay_factor = 0.8,
   n_peak = 6,
+  only_current_season = TRUE,
   ...
 ) {
   # Run the models
   burden_output <- seasonal_burden_levels(tsd = tsd, season_weeks = season_weeks, method = method,
                                           conf_levels = conf_levels, decay_factor = decay_factor,
                                           disease_threshold = disease_threshold, n_peak = n_peak,
-                                          family = family_quant, ...)
+                                          family = family_quant, only_current_season = only_current_season, ...)
 
   onset_output <- seasonal_onset(tsd = tsd, k = k, level = level, disease_threshold = disease_threshold,
                                  family = family, na_fraction_allowed = na_fraction_allowed,
-                                 season_weeks = season_weeks)
+                                 season_weeks = season_weeks, only_current_season = only_current_season)
 
-  # Extract current season from onset_output and create seasonal_onset
+  # Extract seasons from onset_output and create seasonal_onset
   onset_output <- onset_output |>
-    dplyr::filter(.data$season == max(.data$season)) |>
+    dplyr::group_by(.data$season) |>
     dplyr::mutate(onset_flag = cumsum(.data$seasonal_onset_alarm),
                   seasonal_onset = .data$onset_flag == 1 & !duplicated(.data$onset_flag)) |>
     dplyr::select(-(.data$onset_flag))
 
+  # Extract only current season if assigned
+  if (only_current_season == TRUE) {
+    onset_output <- onset_output |>
+      dplyr::filter(.data$season == max(.data$season))
+  }
+
   return(list(onset_output = onset_output, burden_output = burden_output))
 }
diff --git a/R/seasonal_burden_levels.R b/R/seasonal_burden_levels.R
@@ -28,6 +28,7 @@
 #' @param disease_threshold `r rd_disease_threshold(usage = "levels")`
 #' @param n_peak A numeric value specifying the number of peak observations to be selected from each season in the
 #' level calculations. The `n_peak` observations have to surpass the `disease_threshold` to be included.
+#' @param only_current_season `r rd_only_current_season`
 #' @param ... Arguments passed to the `fit_quantiles()` function.
 #'
 #' @return `r rd_seasonal_burden_levels_return`
@@ -76,6 +77,7 @@ seasonal_burden_levels <- function(
   decay_factor = 0.8,
   disease_threshold = 20,
   n_peak = 6,
+  only_current_season = TRUE,
   ...
 ) {
   # Check input arguments
@@ -89,6 +91,7 @@ seasonal_burden_levels <- function(
   checkmate::assert_numeric(decay_factor, lower = 0, upper = 1, len = 1, add = coll)
   checkmate::assert_numeric(n_peak, lower = 1, len = 1, add = coll)
   checkmate::assert_integerish(disease_threshold, len = 1, add = coll)
+  checkmate::assert_logical(only_current_season, add = coll)
   # Assert conf_levels based on the method chosen
   if (method == "intensity_levels") {
     checkmate::assert_numeric(conf_levels, lower = 0, upper = 1, len = 1,
@@ -106,47 +109,63 @@ seasonal_burden_levels <- function(
   if (length(unique(seasonal_tsd$season)) <= 1) coll$push("There must be at least two unique seasons in the data.")
   checkmate::reportAssertions(coll)
 
-  # Add weights and remove current season to get predictions for this season
-  weighted_seasonal_tsd <- seasonal_tsd |>
-    dplyr::filter(.data$season != max(.data$season)) |>
-    dplyr::mutate(year = purrr::map_chr(.data$season, ~ stringr::str_extract(.x, "[0-9]+")) |>
-                    as.numeric()) |>
-    dplyr::mutate(weight = decay_factor^(max(.data$year) - .data$year))
-
   # Select n_peak highest observations and filter observations >= disease_threshold
-  season_observations_and_weights <- weighted_seasonal_tsd |>
-    dplyr::select(-c("year", "time")) |>
+  peak_seasonal_tsd <- seasonal_tsd |>
     dplyr::filter(.data$observation >= disease_threshold) |>
     dplyr::slice_max(.data$observation, n = n_peak, with_ties = FALSE, by = "season")
 
-  # Run quantiles_fit function
-  quantiles_fit <- season_observations_and_weights |>
-    dplyr::select("observation", "weight") |>
-    fit_quantiles(weighted_observations = _, conf_levels = conf_levels, ...)
+  # main level function
+  main_level_fun <- function(seasonal_tsd) {
+    # Add weights and remove current season to get predictions for this season
+    weighted_seasonal_tsd <- seasonal_tsd |>
+      dplyr::filter(.data$season != max(.data$season)) |>
+      dplyr::mutate(year = purrr::map_chr(.data$season, ~ stringr::str_extract(.x, "[0-9]+")) |>
+                      as.numeric()) |>
+      dplyr::mutate(weight = decay_factor^(max(.data$year) - .data$year)) |>
+      dplyr::select(-c("year", "time"))
+
+    # Run quantiles_fit function
+    quantiles_fit <- weighted_seasonal_tsd |>
+      dplyr::select("observation", "weight") |>
+      fit_quantiles(weighted_observations = _, conf_levels = conf_levels, ...)
+
+    # If method intensity_levels was chosen; use the high level from the `fit_quantiles` function as the high
+    # level and the disease_threshold as the very low level. The low and medium levels are defined as the relative
+    # increase between the very low level and high level.
+    results <- switch(method,
+      peak_levels = {
+        model_output <- append(quantiles_fit, list(season = max(seasonal_tsd$season)), after = 0)
+        model_output$values <- stats::setNames(c(disease_threshold, model_output$values),
+                                               c("very low", "low", "medium", "high"))
+        model_output <- append(model_output, list(disease_threshold = disease_threshold))
+      },
+      intensity_levels = {
+        level_step_log <- pracma::logseq(disease_threshold, quantiles_fit$values, n = 4)
+        model_output <- list(
+          season = max(seasonal_tsd$season),
+          high_conf_level = quantiles_fit$conf_levels,
+          values = stats::setNames(level_step_log, c("very low", "low", "medium", "high")),
+          par = quantiles_fit$par,
+          obj_value = quantiles_fit$conf_levels,
+          converged = quantiles_fit$converged,
+          family = quantiles_fit$family,
+          disease_threshold = disease_threshold
+        )
+      }
+    )
+    return(results)
+  }
+  # Select seasons for output based on only_current_season input argument
+  if (only_current_season == FALSE) {
+    # Group seasons to get results for all seasons available
+    unique_seasons <- unique(rev(peak_seasonal_tsd$season))
+    season_groups <- purrr::accumulate(unique_seasons, `c`)
+    season_groups_data <- purrr::map(season_groups[-1], ~ peak_seasonal_tsd |> dplyr::filter(.data$season %in% .x))
+
+    level_results <- purrr::map(season_groups_data, main_level_fun)
 
-  # If method intensity_levels was chosen; use the high level from the `fit_quantiles` function as the high
-  # level and the disease_threshold as the very low level. The low and medium levels are defined as the relative
-  # increase between the very low level and high level.
-  results <- switch(method,
-    peak_levels = {
-      model_output <- append(quantiles_fit, list(season = max(seasonal_tsd$season)), after = 0)
-      model_output$values <- stats::setNames(c(disease_threshold, model_output$values),
-                                             c("very low", "low", "medium", "high"))
-      model_output <- append(model_output, list(disease_threshold = disease_threshold))
-    },
-    intensity_levels = {
-      level_step_log <- pracma::logseq(disease_threshold, quantiles_fit$values, n = 4)
-      model_output <- list(
-        season = max(seasonal_tsd$season),
-        high_conf_level = quantiles_fit$conf_levels,
-        values = stats::setNames(level_step_log, c("very low", "low", "medium", "high")),
-        par = quantiles_fit$par,
-        obj_value = quantiles_fit$conf_levels,
-        converged = quantiles_fit$converged,
-        family = quantiles_fit$family,
-        disease_threshold = disease_threshold
-      )
-    }
-  )
-  return(results)
+  } else {
+    level_results <- main_level_fun(peak_seasonal_tsd)
+  }
+  return(level_results)
 }
diff --git a/R/seasonal_onset.R b/R/seasonal_onset.R
@@ -13,6 +13,7 @@
 #' @param na_fraction_allowed Numeric value between 0 and 1 specifying the fraction of observables in the window
 #' of size k that are allowed to be NA in onset calculations.
 #' @param season_weeks `r rd_season_weeks(usage = "onset")`
+#' @param only_current_season `r rd_only_current_season`
 #'
 #' @return `r rd_seasonal_onset_return`
 #'
@@ -54,7 +55,8 @@ seasonal_onset <- function(
       # TODO: #10 Include negative.binomial regressions. @telkamp7
     ),
     na_fraction_allowed = 0.4,
-    season_weeks = NULL) {
+    season_weeks = NULL,
+    only_current_season = NULL) {
   # Check input arguments
   coll <- checkmate::makeAssertCollection()
   checkmate::assert_data_frame(tsd, add = coll)
@@ -67,25 +69,32 @@ seasonal_onset <- function(
   checkmate::assert_integerish(disease_threshold, add = coll)
   checkmate::assert_integerish(season_weeks, len = 2, lower = 1, upper = 53,
                                null.ok = TRUE, add = coll)
+  checkmate::assert_logical(only_current_season, null.ok = TRUE, add = coll)
+  if (is.null(season_weeks) && !is.null(only_current_season)) {
+    coll$push("If season_weeks is NULL only_current_season must also be NULL")
+  }
+  if (!is.null(season_weeks) && is.null(only_current_season)) {
+    coll$push("If season_weeks is assigned only_current_season must also be assigned")
+  }
   checkmate::reportAssertions(coll)
 
   # Throw an error if any of the inputs are not supported
   family <- rlang::arg_match(family)
 
-  # Extract the length of the series
-  n <- base::nrow(tsd)
-
-  # Allocate space for growth rate estimates
-  res <- tibble::tibble()
-  skipped_window <- base::rep(FALSE, base::nrow(tsd))
-
   # Add the seasons to tsd if available
   if (!is.null(season_weeks)) {
     tsd <- tsd |> dplyr::mutate(season = epi_calendar(.data$time))
   } else {
     tsd <- tsd |> dplyr::mutate(season = "not_defined")
   }
 
+  # Extract the length of the series
+  n <- base::nrow(tsd)
+
+  # Allocate space for growth rate estimates
+  res <- tibble::tibble()
+  skipped_window <- base::rep(FALSE, base::nrow(tsd))
+
   for (i in k:n) {
     # Index observations for this iteration
     obs_iter <- tsd[(i - k + 1):i, ]
@@ -147,6 +156,11 @@ seasonal_onset <- function(
     family = family
   )
 
+  # Extract only current season if assigned
+  if (!is.null(season_weeks) && only_current_season == TRUE) {
+    ans <- ans |> dplyr::filter(.data$season == max(.data$season))
+  }
+
   # Keep attributes from the `tsd` class
   attr(ans, "time_interval") <- attr(tsd, "time_interval")
 

diff --git a/man/combined_seasonal_output.Rd b/man/combined_seasonal_output.Rd
diff --git a/man/seasonal_burden_levels.Rd b/man/seasonal_burden_levels.Rd
diff --git a/man/seasonal_onset.Rd b/man/seasonal_onset.Rd
diff --git a/tests/testthat/test-combined_seasonal_output.R b/tests/testthat/test-combined_seasonal_output.R
@@ -1,5 +1,32 @@
-test_that("Output only includes one season", {
+test_that("Test that selection of current and all seasons work as expected", {
+  start_date <- as.Date("2021-01-04")
+  end_date <- as.Date("2023-12-31")
+
+  weekly_dates <- seq.Date(from = start_date,
+                           to = end_date,
+                           by = "week")
+
+  obs <- stats::rpois(length(weekly_dates), 1000)
+
+  tsd_data <- to_time_series(
+    observation = obs,
+    time = as.Date(weekly_dates),
+    time_interval = "week"
+  )
+
+  current_season <- epi_calendar(end_date)
+
+  current_season_output <- combined_seasonal_output(tsd_data, only_current_season = TRUE)
+  all_seasons_output <- combined_seasonal_output(tsd_data, only_current_season = FALSE)
+
+  expect_equal(unique(current_season_output$onset_output$season), current_season)
+  expect_equal(unique(current_season_output$burden_output$season), current_season)
+
+  expect_gt(length(unique(all_seasons_output$onset_output$season)), 1)
+  expect_gt(length(all_seasons_output$burden_output), 1)
+})
 
+test_that("Test that onset_output has one more season than burden_output", {
   start_date <- as.Date("2021-01-04")
   end_date <- as.Date("2023-12-31")
 
@@ -15,8 +42,8 @@ test_that("Output only includes one season", {
     time_interval = "week"
   )
 
-  combined_df <- combined_seasonal_output(tsd = tsd_data)
+  all_seasons_output <- combined_seasonal_output(tsd_data, only_current_season = FALSE)
 
-  expect_equal(length(unique(combined_df$onset_output$season)), 1)
-  expect_equal(length(combined_df$burden_output$season), 1)
+  expect_length(unique(all_seasons_output$onset_output$season), 4)
+  expect_length(all_seasons_output$burden_output, 3)
 })
diff --git a/tests/testthat/test-seasonal_burden_levels.R b/tests/testthat/test-seasonal_burden_levels.R
@@ -141,3 +141,28 @@ test_that("Test that function fail with less than two seasons", {
     )
   )
 })
+
+test_that("Test that selection of current and all seasons work as expected", {
+  start_date <- as.Date("2021-01-04")
+  end_date <- as.Date("2023-12-31")
+
+  weekly_dates <- seq.Date(from = start_date,
+                           to = end_date,
+                           by = "week")
+
+  obs <- stats::rpois(length(weekly_dates), 1000)
+
+  tsd_data <- to_time_series(
+    observation = obs,
+    time = as.Date(weekly_dates),
+    time_interval = "week"
+  )
+
+  current_season <- epi_calendar(end_date)
+
+  current_level <- seasonal_burden_levels(tsd_data, only_current_season = TRUE)
+  all_levels <- seasonal_burden_levels(tsd_data, only_current_season = FALSE)
+
+  expect_equal(current_season, unique(current_level$season))
+  expect_gt(length(all_levels), 1)
+})