Merge pull request #38 from getwilds/refactor

Refactor
getwilds · Mar 26, 2024 · 55474ad · 55474ad
2 parents b86ff30 + b5a6893
commit 55474ad
Show file tree

Hide file tree

Showing 50 changed files with 751 additions and 383 deletions.
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,59 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+name: test-coverage
+
+jobs:
+  test-coverage:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::covr
+          needs: coverage
+
+      - name: spin up cromwell
+        run: |
+          wget https://github.com/broadinstitute/cromwell/releases/download/86/cromwell-86.jar
+          java -jar cromwell-86.jar server &
+
+      - name: Test coverage
+        run: |
+          covr::codecov(
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+          )
+        shell: Rscript {0}
+        env:
+          CROMWELLURL: http://localhost:8000
+
+      - name: cleanup cromwell
+        run: kill $(ps -eaf | grep 'cromwell-[0-9][0-9].jar' | awk '{ print $2 }')
+
+      - name: Show testthat output
+        if: always()
+        run: |
+          ## --------------------------------------------------------------------
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
+        shell: bash
+
+      - name: Upload test results
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-test-failures
+          path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rcromwell
 Title: Convenience Tools for Managing WDL Workflows via Cromwell
-Version: 3.2.1.92
+Version: 3.2.2.91
 Authors@R: c(
     person("Amy", "Paguirigan", role = "aut",
            comment = c(ORCID = "0000-0002-6819-9736")),
@@ -19,7 +19,8 @@ Imports:
     purrr,
     tidyr,
     lubridate,
-    rlang
+    rlang,
+    glue
 License: MIT + file LICENSE
 Depends: R (>= 3.6.0)
 Roxygen: list(markdown = TRUE, roclets = c("collate", "namespace", "rd",

diff --git a/Makefile b/Makefile
@@ -27,6 +27,11 @@ check: build
 test:
 	CROMWELLURL=${CROMWELLURL} ${RSCRIPT} -e "devtools::test()"
 
+coverage:
+	CROMWELLURL=${CROMWELLURL} \
+${RSCRIPT} -e 'if (!requireNamespace("covr")) pak::pak("covr")' \
+-e 'Sys.setenv(NOT_CRAN = "true"); covr::package_coverage()'
+
 readme:
 	${RSCRIPT} -e "knitr::knit('README.Rmd')"
 

diff --git a/NAMESPACE b/NAMESPACE
@@ -21,6 +21,7 @@ export(workflow_options)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,tibble)
+importFrom(glue,glue)
 importFrom(httr,GET)
 importFrom(httr,POST)
 importFrom(httr,content)

diff --git a/R/cromUtils.R b/R/cromUtils.R
@@ -7,7 +7,10 @@
 #' @author Amy Paguirigan, Scott Chamberlain
 #' @section Important:
 #' Requires valid Cromwell server URL to be set. See [cromwell_settings]
-#' @return Returns a data frame of the options for a workflow previously run
+#' @return a tibble of the options for a workflow previously run:
+#' - read_from_cache
+#' - workflow_failure_mode
+#' - write_to_cache
 #' @examples \dontrun{
 #' jobs <- cromwell_jobs()
 #' workflow_options(jobs$workflow_id[1])
@@ -26,7 +29,7 @@ workflow_options <- function(workflow_id, url = cw_url(), token = NULL) {
 #' @template serverdeets
 #' @author Amy Paguirigan, Scott Chamberlain
 #' @inheritSection workflow_options Important
-#' @return Returns a data frame of the inputs for a workflow previously run
+#' @return a tibble of the inputs for a workflow previously run
 workflow_inputs <- function(workflow_id, url = cw_url(), token = NULL) {
   check_url(url)
   dplyr::as_tibble(

diff --git a/R/cromwellAbort.R b/R/cromwellAbort.R
@@ -5,7 +5,7 @@
 #' @template serverdeets
 #' @author Amy Paguirigan, Scott Chamberlain
 #' @inheritSection workflow_options Important
-#' @return Returns the response from the API post
+#' @return a tibble
 cromwell_abort <- function(workflow_id, url = cw_url(), token = NULL) {
   check_url(url)
   crom_mssg("Aborting job in Cromwell")

diff --git a/R/cromwellBackends.R b/R/cromwellBackends.R
@@ -4,7 +4,7 @@
 #' @template serverdeets
 #' @author Amy Paguirigan, Scott Chamberlain
 #' @inheritSection workflow_options Important
-#' @return Cromwell backend options
+#' @return Cromwell backend options as a list
 cromwell_backends <- function(url = cw_url(), token = NULL) {
   check_url(url)
   crom_mssg("Getting backend options from Cromwell")

diff --git a/R/cromwellCache.R b/R/cromwellCache.R
@@ -6,68 +6,68 @@
 #' @template workflowid
 #' @template serverdeets
 #' @author Amy Paguirigan, Scott Chamberlain
-#' @autoglobal
 #' @inheritSection workflow_options Important
-#' @return Returns a long form data frame of metadata on call caching in a
-#' workflow. NOTE: Currently does not support subworkflows well.
+#' @details Currently does not support subworkflows well.
+#' @return a tibble of metadata on call caching in a workflow; columns
+#' vary depending on workflow; if no results, a zero row tibble
 cromwell_cache <- function(workflow_id, url = cw_url(), token = NULL) {
   check_url(url)
-  crom_mssg(paste0(
-    "Querying for call caching metadata for workflow id: ",
-    workflow_id
+  crom_mssg(glue(
+    "Querying for call caching metadata for workflow id: {workflow_id}"
   ))
+  crommetadata <- cromwell_cache_http(workflow_id, url, token)
+  cromwell_cache_process(crommetadata, workflow_id)
+}
 
-  crommetadata <-
-    http_get(
-      url = make_url(url, "api/workflows/v1", workflow_id, "metadata"),
-      query = list(expandSubWorkflows = "false"),
-      as = "parsed",
-      token = token
-    )
+cromwell_cache_http <- function(workflow_id, url = cw_url(), token = NULL) {
+  http_get(
+    url = make_url(url, "api/workflows/v1", workflow_id, "metadata"),
+    query = list(expandSubWorkflows = "false"),
+    as = "parsed",
+    token = token
+  )
+}
 
-  if (length(crommetadata$calls) > 0) {
-    # if there are calls to be queried, continue
-    # we only want the calls data from the metadata for this workflow
-    bob_calls <- purrr::pluck(crommetadata, "calls")
-    bob_call_meta <-
-      purrr::map(bob_calls, function(call_data) {
-        # for each of the calls in the workflow...
-        purrr::map_dfr(call_data, function(shard_data) {
-          # and for each of the shards in that workflow...
-          if ("inputs" %in% names(shard_data)) {
-            a <- purrr::keep(
-              shard_data,
-              names(shard_data) %in% c("callCaching", "inputs", "outputs")
-            ) # select only these lists
-            # flatten them and make them a data frame
-            b <- dplyr::as_tibble(rbind(unlist(a)))
-            # add the shard Index associated
-            b$shardIndex <- shard_data$shardIndex
-          } else {
-            b <- dplyr::as_tibble("shardIndex" = shard_data$shardIndex)
-          }
-          b$shardIndex <- as.character(b$shardIndex)
-          b$workflow_id <- workflow_id
-          b$executionStatus <- shard_data$executionStatus
-          b$returnCode <- shard_data$returnCode
-          b$jobId <- shard_data$jobId
-          # then remove any data from the messy hitFailures lists
-          b <- dplyr::select(b, -dplyr::starts_with("callCaching.hitFailures"))
-          return(b)
-        })
-      })
-    geocache <- purrr::map_dfr(bob_call_meta, rbind, .id = "fullName")
-    # split fullname into workflowName and callName
-    geocache <- tidyr::separate(
-      data = geocache,
-      col = fullName,
-      into = c("workflowName", "callName"),
-      sep = "\\.",
-      extra = "merge"
-    )
-  } else {
-    geocache <-
-      dplyr::tibble("workflow_id" = "No call caching metadata available.")
+#' @autoglobal
+cromwell_cache_process <- function(crommetadata, workflow_id) {
+  if (length(crommetadata$calls) == 0) {
+    return(dplyr::tibble())
   }
-  return(geocache)
+  # we only want the calls data from the metadata for this workflow
+  bob_calls <- purrr::pluck(crommetadata, "calls")
+  bob_call_meta <-
+    purrr::map(bob_calls, function(call_data) {
+      # for each of the calls in the workflow...
+      purrr::map_dfr(call_data, function(shard_data) {
+        # and for each of the shards in that workflow...
+        if ("inputs" %in% names(shard_data)) {
+          a <- purrr::keep(
+            shard_data,
+            names(shard_data) %in% c("callCaching", "inputs", "outputs")
+          ) # select only these lists
+          # flatten them and make them a data frame
+          b <- dplyr::as_tibble(rbind(unlist(a)))
+          # add the shard Index associated
+          b$shardIndex <- shard_data$shardIndex
+        } else {
+          b <- dplyr::as_tibble("shardIndex" = shard_data$shardIndex)
+        }
+        b$shardIndex <- as.character(b$shardIndex)
+        b$workflow_id <- workflow_id
+        b$executionStatus <- shard_data$executionStatus
+        b$returnCode <- shard_data$returnCode
+        b$jobId <- shard_data$jobId
+        # then remove any data from the messy hitFailures lists
+        dplyr::select(b, -dplyr::starts_with("callCaching.hitFailures"))
+      })
+    })
+  geocache <- purrr::map_dfr(bob_call_meta, rbind, .id = "fullName")
+  # split fullname into workflowName and callName
+  tidyr::separate(
+    data = geocache,
+    col = fullName,
+    into = c("workflowName", "callName"),
+    sep = "\\.",
+    extra = "merge"
+  )
 }
diff --git a/R/cromwellCall.R b/R/cromwellCall.R
@@ -25,7 +25,7 @@
 #' }
 cromwell_call <- function(workflow_id, url = cw_url(), token = NULL) {
   check_url(url)
-  crom_mssg(paste0("Querying for call metadata for workflow id: ", workflow_id))
+  crom_mssg(glue("Querying for call metadata for workflow id: {workflow_id}"))
   crommetadata <-
     http_get(
       url = make_url(url, "api/workflows/v1", workflow_id, "metadata"),