Skip to content

Commit

Permalink
Merge pull request #45 from ssi-dk/add_choice_flag
Browse files Browse the repository at this point in the history
Add possibility to get results for all seasons or only current season
  • Loading branch information
SofiaOtero authored Dec 5, 2024
2 parents db12e03 + 98d73c1 commit dedc6e9
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 58 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

## Features

* Added a new argument `only_current_season` to `seasonal_onset()`, `seasonal_burden_levels()` and `combined_seasonal_output()` which gives the possibility to either get output from only the current season or for all available seasons (#45).

* Added `combined_seasonal_output()` as the main function to run both `seasonal_onset()` and `seasonal_burden_levels()` to get a combined result for the newest season (#44).

* Added `seasonal_onset()` as a replacement for the deprecated `aedseo()` function (#41).
Expand Down
5 changes: 2 additions & 3 deletions R/0_documentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@ rd_family <- function(usage = NULL) {
paste("A character string specifying the family for modeling",
ifelse(usage == "combined", paste(" seasonal onset.")))
}
rd_only_current_season <- "Should the output only include results for the current season?"
rd_season_weeks <- function(usage = NULL) {
paste("A numeric vector of length 2, `c(start,end)`, with the start and end weeks of the seasons to
stratify the observations by. Must span the new year; ex: `season_weeks = c(21, 20)`.",
ifelse(usage == "onset", paste("If set to `NULL`, is means no stratification by season.")))
}
rd_tsd <- "An object containing time series data with 'time' and 'observation.'"

rd_seasonal_onset_return <- paste(
"\nA `seasonal_onset` object containing:\n",
"- 'reference_time': The time point for which the growth rate is estimated.\n",
Expand All @@ -41,7 +40,6 @@ rd_seasonal_onset_return <- paste(
"- 'skipped_window': Logical. Was the window skipped due to missing?\n",
"- 'converged': Logical. Was the IWLS judged to have converged?"
)

rd_seasonal_burden_levels_return <- paste(
"\nA list containing:\n",
"- 'season': The season that burden levels are calculated for.\n",
Expand All @@ -66,3 +64,4 @@ rd_seasonal_burden_levels_return <- paste(
" - 'exp': Uses the Exponential distribution for fitting.\n",
" - 'disease_threshold': The input disease threshold, which is also the very low level."
)
rd_tsd <- "An object containing time series data with 'time' and 'observation.'"
15 changes: 11 additions & 4 deletions R/combined_seasonal_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,24 +83,31 @@ combined_seasonal_output <- function(
conf_levels = 0.95,
decay_factor = 0.8,
n_peak = 6,
only_current_season = TRUE,
...
) {
# Run the models
burden_output <- seasonal_burden_levels(tsd = tsd, season_weeks = season_weeks, method = method,
conf_levels = conf_levels, decay_factor = decay_factor,
disease_threshold = disease_threshold, n_peak = n_peak,
family = family_quant, ...)
family = family_quant, only_current_season = only_current_season, ...)

onset_output <- seasonal_onset(tsd = tsd, k = k, level = level, disease_threshold = disease_threshold,
family = family, na_fraction_allowed = na_fraction_allowed,
season_weeks = season_weeks)
season_weeks = season_weeks, only_current_season = only_current_season)

# Extract current season from onset_output and create seasonal_onset
# Extract seasons from onset_output and create seasonal_onset
onset_output <- onset_output |>
dplyr::filter(.data$season == max(.data$season)) |>
dplyr::group_by(.data$season) |>
dplyr::mutate(onset_flag = cumsum(.data$seasonal_onset_alarm),
seasonal_onset = .data$onset_flag == 1 & !duplicated(.data$onset_flag)) |>
dplyr::select(-(.data$onset_flag))

# Extract only current season if assigned
if (only_current_season == TRUE) {
onset_output <- onset_output |>
dplyr::filter(.data$season == max(.data$season))
}

return(list(onset_output = onset_output, burden_output = burden_output))
}
95 changes: 57 additions & 38 deletions R/seasonal_burden_levels.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#' @param disease_threshold `r rd_disease_threshold(usage = "levels")`
#' @param n_peak A numeric value specifying the number of peak observations to be selected from each season in the
#' level calculations. The `n_peak` observations have to surpass the `disease_threshold` to be included.
#' @param only_current_season `r rd_only_current_season`
#' @param ... Arguments passed to the `fit_quantiles()` function.
#'
#' @return `r rd_seasonal_burden_levels_return`
Expand Down Expand Up @@ -76,6 +77,7 @@ seasonal_burden_levels <- function(
decay_factor = 0.8,
disease_threshold = 20,
n_peak = 6,
only_current_season = TRUE,
...
) {
# Check input arguments
Expand All @@ -89,6 +91,7 @@ seasonal_burden_levels <- function(
checkmate::assert_numeric(decay_factor, lower = 0, upper = 1, len = 1, add = coll)
checkmate::assert_numeric(n_peak, lower = 1, len = 1, add = coll)
checkmate::assert_integerish(disease_threshold, len = 1, add = coll)
checkmate::assert_logical(only_current_season, add = coll)
# Assert conf_levels based on the method chosen
if (method == "intensity_levels") {
checkmate::assert_numeric(conf_levels, lower = 0, upper = 1, len = 1,
Expand All @@ -106,47 +109,63 @@ seasonal_burden_levels <- function(
if (length(unique(seasonal_tsd$season)) <= 1) coll$push("There must be at least two unique seasons in the data.")
checkmate::reportAssertions(coll)

# Add weights and remove current season to get predictions for this season
weighted_seasonal_tsd <- seasonal_tsd |>
dplyr::filter(.data$season != max(.data$season)) |>
dplyr::mutate(year = purrr::map_chr(.data$season, ~ stringr::str_extract(.x, "[0-9]+")) |>
as.numeric()) |>
dplyr::mutate(weight = decay_factor^(max(.data$year) - .data$year))

# Select n_peak highest observations and filter observations >= disease_threshold
season_observations_and_weights <- weighted_seasonal_tsd |>
dplyr::select(-c("year", "time")) |>
peak_seasonal_tsd <- seasonal_tsd |>
dplyr::filter(.data$observation >= disease_threshold) |>
dplyr::slice_max(.data$observation, n = n_peak, with_ties = FALSE, by = "season")

# Run quantiles_fit function
quantiles_fit <- season_observations_and_weights |>
dplyr::select("observation", "weight") |>
fit_quantiles(weighted_observations = _, conf_levels = conf_levels, ...)
# main level function
main_level_fun <- function(seasonal_tsd) {
# Add weights and remove current season to get predictions for this season
weighted_seasonal_tsd <- seasonal_tsd |>
dplyr::filter(.data$season != max(.data$season)) |>
dplyr::mutate(year = purrr::map_chr(.data$season, ~ stringr::str_extract(.x, "[0-9]+")) |>
as.numeric()) |>
dplyr::mutate(weight = decay_factor^(max(.data$year) - .data$year)) |>
dplyr::select(-c("year", "time"))

# Run quantiles_fit function
quantiles_fit <- weighted_seasonal_tsd |>
dplyr::select("observation", "weight") |>
fit_quantiles(weighted_observations = _, conf_levels = conf_levels, ...)

# If method intensity_levels was chosen; use the high level from the `fit_quantiles` function as the high
# level and the disease_threshold as the very low level. The low and medium levels are defined as the relative
# increase between the very low level and high level.
results <- switch(method,
peak_levels = {
model_output <- append(quantiles_fit, list(season = max(seasonal_tsd$season)), after = 0)
model_output$values <- stats::setNames(c(disease_threshold, model_output$values),
c("very low", "low", "medium", "high"))
model_output <- append(model_output, list(disease_threshold = disease_threshold))
},
intensity_levels = {
level_step_log <- pracma::logseq(disease_threshold, quantiles_fit$values, n = 4)
model_output <- list(
season = max(seasonal_tsd$season),
high_conf_level = quantiles_fit$conf_levels,
values = stats::setNames(level_step_log, c("very low", "low", "medium", "high")),
par = quantiles_fit$par,
obj_value = quantiles_fit$conf_levels,
converged = quantiles_fit$converged,
family = quantiles_fit$family,
disease_threshold = disease_threshold
)
}
)
return(results)
}
# Select seasons for output based on only_current_season input argument
if (only_current_season == FALSE) {
# Group seasons to get results for all seasons available
unique_seasons <- unique(rev(peak_seasonal_tsd$season))
season_groups <- purrr::accumulate(unique_seasons, `c`)
season_groups_data <- purrr::map(season_groups[-1], ~ peak_seasonal_tsd |> dplyr::filter(.data$season %in% .x))

level_results <- purrr::map(season_groups_data, main_level_fun)

# If method intensity_levels was chosen; use the high level from the `fit_quantiles` function as the high
# level and the disease_threshold as the very low level. The low and medium levels are defined as the relative
# increase between the very low level and high level.
results <- switch(method,
peak_levels = {
model_output <- append(quantiles_fit, list(season = max(seasonal_tsd$season)), after = 0)
model_output$values <- stats::setNames(c(disease_threshold, model_output$values),
c("very low", "low", "medium", "high"))
model_output <- append(model_output, list(disease_threshold = disease_threshold))
},
intensity_levels = {
level_step_log <- pracma::logseq(disease_threshold, quantiles_fit$values, n = 4)
model_output <- list(
season = max(seasonal_tsd$season),
high_conf_level = quantiles_fit$conf_levels,
values = stats::setNames(level_step_log, c("very low", "low", "medium", "high")),
par = quantiles_fit$par,
obj_value = quantiles_fit$conf_levels,
converged = quantiles_fit$converged,
family = quantiles_fit$family,
disease_threshold = disease_threshold
)
}
)
return(results)
} else {
level_results <- main_level_fun(peak_seasonal_tsd)
}
return(level_results)
}
30 changes: 22 additions & 8 deletions R/seasonal_onset.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' @param na_fraction_allowed Numeric value between 0 and 1 specifying the fraction of observables in the window
#' of size k that are allowed to be NA in onset calculations.
#' @param season_weeks `r rd_season_weeks(usage = "onset")`
#' @param only_current_season `r rd_only_current_season`
#'
#' @return `r rd_seasonal_onset_return`
#'
Expand Down Expand Up @@ -54,7 +55,8 @@ seasonal_onset <- function(
# TODO: #10 Include negative.binomial regressions. @telkamp7
),
na_fraction_allowed = 0.4,
season_weeks = NULL) {
season_weeks = NULL,
only_current_season = NULL) {
# Check input arguments
coll <- checkmate::makeAssertCollection()
checkmate::assert_data_frame(tsd, add = coll)
Expand All @@ -67,25 +69,32 @@ seasonal_onset <- function(
checkmate::assert_integerish(disease_threshold, add = coll)
checkmate::assert_integerish(season_weeks, len = 2, lower = 1, upper = 53,
null.ok = TRUE, add = coll)
checkmate::assert_logical(only_current_season, null.ok = TRUE, add = coll)
if (is.null(season_weeks) && !is.null(only_current_season)) {
coll$push("If season_weeks is NULL only_current_season must also be NULL")
}
if (!is.null(season_weeks) && is.null(only_current_season)) {
coll$push("If season_weeks is assigned only_current_season must also be assigned")
}
checkmate::reportAssertions(coll)

# Throw an error if any of the inputs are not supported
family <- rlang::arg_match(family)

# Extract the length of the series
n <- base::nrow(tsd)

# Allocate space for growth rate estimates
res <- tibble::tibble()
skipped_window <- base::rep(FALSE, base::nrow(tsd))

# Add the seasons to tsd if available
if (!is.null(season_weeks)) {
tsd <- tsd |> dplyr::mutate(season = epi_calendar(.data$time))
} else {
tsd <- tsd |> dplyr::mutate(season = "not_defined")
}

# Extract the length of the series
n <- base::nrow(tsd)

# Allocate space for growth rate estimates
res <- tibble::tibble()
skipped_window <- base::rep(FALSE, base::nrow(tsd))

for (i in k:n) {
# Index observations for this iteration
obs_iter <- tsd[(i - k + 1):i, ]
Expand Down Expand Up @@ -147,6 +156,11 @@ seasonal_onset <- function(
family = family
)

# Extract only current season if assigned
if (!is.null(season_weeks) && only_current_season == TRUE) {
ans <- ans |> dplyr::filter(.data$season == max(.data$season))
}

# Keep attributes from the `tsd` class
attr(ans, "time_interval") <- attr(tsd, "time_interval")

Expand Down
3 changes: 3 additions & 0 deletions man/combined_seasonal_output.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/seasonal_burden_levels.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/seasonal_onset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 31 additions & 4 deletions tests/testthat/test-combined_seasonal_output.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,32 @@
test_that("Output only includes one season", {
test_that("Test that selection of current and all seasons work as expected", {
start_date <- as.Date("2021-01-04")
end_date <- as.Date("2023-12-31")

weekly_dates <- seq.Date(from = start_date,
to = end_date,
by = "week")

obs <- stats::rpois(length(weekly_dates), 1000)

tsd_data <- to_time_series(
observation = obs,
time = as.Date(weekly_dates),
time_interval = "week"
)

current_season <- epi_calendar(end_date)

current_season_output <- combined_seasonal_output(tsd_data, only_current_season = TRUE)
all_seasons_output <- combined_seasonal_output(tsd_data, only_current_season = FALSE)

expect_equal(unique(current_season_output$onset_output$season), current_season)
expect_equal(unique(current_season_output$burden_output$season), current_season)

expect_gt(length(unique(all_seasons_output$onset_output$season)), 1)
expect_gt(length(all_seasons_output$burden_output), 1)
})

test_that("Test that onset_output has one more season than burden_output", {
start_date <- as.Date("2021-01-04")
end_date <- as.Date("2023-12-31")

Expand All @@ -15,8 +42,8 @@ test_that("Output only includes one season", {
time_interval = "week"
)

combined_df <- combined_seasonal_output(tsd = tsd_data)
all_seasons_output <- combined_seasonal_output(tsd_data, only_current_season = FALSE)

expect_equal(length(unique(combined_df$onset_output$season)), 1)
expect_equal(length(combined_df$burden_output$season), 1)
expect_length(unique(all_seasons_output$onset_output$season), 4)
expect_length(all_seasons_output$burden_output, 3)
})
25 changes: 25 additions & 0 deletions tests/testthat/test-seasonal_burden_levels.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,28 @@ test_that("Test that function fail with less than two seasons", {
)
)
})

test_that("Test that selection of current and all seasons work as expected", {
start_date <- as.Date("2021-01-04")
end_date <- as.Date("2023-12-31")

weekly_dates <- seq.Date(from = start_date,
to = end_date,
by = "week")

obs <- stats::rpois(length(weekly_dates), 1000)

tsd_data <- to_time_series(
observation = obs,
time = as.Date(weekly_dates),
time_interval = "week"
)

current_season <- epi_calendar(end_date)

current_level <- seasonal_burden_levels(tsd_data, only_current_season = TRUE)
all_levels <- seasonal_burden_levels(tsd_data, only_current_season = FALSE)

expect_equal(current_season, unique(current_level$season))
expect_gt(length(all_levels), 1)
})
Loading

0 comments on commit dedc6e9

Please sign in to comment.