From 445995a5304fecd35eefaa68b74d24b06ab22971 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 1 Mar 2024 10:43:37 -0500
Subject: [PATCH 001/186] small change

---
 pipelines/skylab/snM3C/snM3C.wdl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/skylab/snM3C/snM3C.wdl b/pipelines/skylab/snM3C/snM3C.wdl
index bcdc71a861..bac72eb68c 100644
--- a/pipelines/skylab/snM3C/snM3C.wdl
+++ b/pipelines/skylab/snM3C/snM3C.wdl
@@ -23,7 +23,6 @@ workflow snM3C {
         Int num_downstr_bases = 2
         Int compress_level = 5
         Int batch_number
-
     }
 
     # version of the pipeline

From cae0f54dd1480271a273fc40351b1fdb56d5e233 Mon Sep 17 00:00:00 2001
From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
Date: Wed, 6 Mar 2024 13:21:43 -0500
Subject: [PATCH 002/186] azurize optimus  (#1228)

* add logic to choose which docker

* fix param_meta and import

* add cloud provider to checkinput

* handle hard coded white list paths in CheckInputs.wdl

* last few dockers

* last few dockers

* last few dockers

* change error msg

* use ubuntu image

* use ubuntu image

* change whitelists

* point to azure public whitelists

* add sas token

* echo whitelist

* echo whitelist

* testing for coa

* testing for coa

* change back to terra buckets for whitelists

* change whitelists to point at public azure bucket

* files to strings

* print statemtns to checkinputs

* string to files

* change to terra bucket paths

* strings not files

* append sas token

* append sas token

* append sas  and use strings

* back to bucket urls

* back to bucket urls

* use google cloud urls

* using public urls

* trying to export sas_token

* trying to export sas_token

* trying to export sas_token

* terra on gcp

* update azure whitelist files

* changelogs

* changelogs

* changelogs

* changelogs

* fix some inputs

* fix some inputs

* fix some inputs

* fix some inputs

* update optimus dockers

* warp_tools_docker_path for staralign

* stop using ice lake as default

* update pipeline docs

* 2 threads

* counting mode

* changelogs

---------

Co-authored-by: phendriksen100 <103142505+phendriksen100@users.noreply.github.com>
Co-authored-by: kayleemathews <kmathews@broadinstitute.org>
---
 .../skylab/multiome/Multiome.changelog.md     |   5 +
 pipelines/skylab/multiome/Multiome.wdl        |   6 +-
 pipelines/skylab/multiome/atac.changelog.md   |   7 +-
 pipelines/skylab/multiome/atac.wdl            |   2 +-
 .../Plumbing/10k_pbmc_downsampled.json        |   3 +-
 pipelines/skylab/optimus/Optimus.changelog.md |   5 +
 pipelines/skylab/optimus/Optimus.wdl          | 101 ++++++++++++++----
 .../Plumbing/human_v3_example.json            |   3 +-
 .../Plumbing/mouse_v2_example.json            |   3 +-
 .../Plumbing/mouse_v2_snRNA_example.json      |   3 +-
 .../skylab/paired_tag/PairedTag.changelog.md  |   4 +
 pipelines/skylab/paired_tag/PairedTag.wdl     |   2 +-
 .../skylab/slideseq/SlideSeq.changelog.md     |   8 ++
 pipelines/skylab/slideseq/SlideSeq.wdl        |  57 ++++++++--
 .../Plumbing/Puck_210817_11.mm10.json         |   3 +-
 ...iSampleSmartSeq2SingleNucleus.changelog.md |  10 ++
 .../MultiSampleSmartSeq2SingleNucleus.wdl     |  22 +++-
 .../test_inputs/Plumbing/mouse_example.json   |   3 +-
 pipelines/skylab/snM3C/snM3C.changelog.md     |   2 +-
 pipelines/skylab/snM3C/snM3C.wdl              |   1 +
 tasks/skylab/CheckInputs.wdl                  |  36 ++++++-
 tasks/skylab/FastqProcessing.wdl              |   7 +-
 tasks/skylab/H5adUtils.wdl                    |   8 +-
 tasks/skylab/MergeSortBam.wdl                 |   6 +-
 tasks/skylab/Metrics.wdl                      |  13 +--
 tasks/skylab/RunEmptyDrops.wdl                |   4 +-
 tasks/skylab/StarAlign.wdl                    |  26 +++--
 .../TestMultiSampleSmartSeq2SingleNucleus.wdl |   5 +-
 verification/test-wdls/TestMultiome.wdl       |   4 +-
 verification/test-wdls/TestOptimus.wdl        |   5 +-
 verification/test-wdls/TestSlideSeq.wdl       |   4 +-
 website/docs/Pipelines/ATAC/README.md         |   2 +-
 .../Pipelines/Multiome_Pipeline/README.md     |   5 +-
 .../docs/Pipelines/Optimus_Pipeline/README.md |   3 +-
 .../Pipelines/PairedTag_Pipeline/README.md    |   2 +-
 .../Pipelines/SlideSeq_Pipeline/README.md     |   3 +-
 .../README.md                                 |   3 +-
 .../multi_snss2.methods.md                    |   4 +-
 38 files changed, 301 insertions(+), 89 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index da8bc38753..6a82ca00da 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,3 +1,8 @@
+# 3.2.2
+2024-03-01 (Date of Last Commit)
+
+* Updated the Optimus.wdl to run on Azure. This change does not affect the Multiome pipeline.
+
 # 3.2.1
 2024-02-29 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 1e6bc2edae..64aa671836 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -6,10 +6,11 @@ import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender
 
 workflow Multiome {
-    String pipeline_version = "3.2.1"
+    String pipeline_version = "3.2.2"
 
     input {
         String input_id
+        String cloud_provider
 
         # Optimus Inputs
         String counting_mode = "sn_rna"
@@ -68,7 +69,8 @@ workflow Multiome {
             ignore_r1_read_length = ignore_r1_read_length,
             star_strand_mode = star_strand_mode,
             count_exons = count_exons,
-            soloMultiMappers = soloMultiMappers
+            soloMultiMappers = soloMultiMappers,
+            cloud_provider = cloud_provider
     }
 
     # Call the ATAC workflow
diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
index 170caa2aed..005a2fb782 100644
--- a/pipelines/skylab/multiome/atac.changelog.md
+++ b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,4 +1,9 @@
-# 1.1.8
+# 1.1.9
+2024-03-01 (Date of Last Commit)
+
+*  Updated the Optimus.wdl to run on Azure. This change does not affect the ATAC pipeline.
+
+* # 1.1.8
 2024-02-07 (Date of Last Commit)
 
 * Updated the Metrics tasks to exclude mitochondrial genes from reads_mapped_uniquely, reads_mapped_multiple and reads_mapped_exonic, reads_mapped_exonic_as and reads_mapped_intergenic
diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 3dd81d7bf5..0431ba3997 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -41,7 +41,7 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "1.1.8"
+  String pipeline_version = "1.1.9"
 
   parameter_meta {
     read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads"
diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
index 7d15111f38..bd9b7a1172 100644
--- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
+++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -23,5 +23,6 @@
   "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
   "Multiome.Atac.num_threads_bwa":"16",
   "Multiome.Atac.mem_size_bwa":"64", 
-  "Multiome.soloMultiMappers":"Uniform"
+  "Multiome.soloMultiMappers":"Uniform",
+  "Multiome.cloud_provider":"gcp"
 }
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index 23098dd7a0..d76bedaed5 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,3 +1,8 @@
+# 6.4.2
+2024-03-01 (Date of Last Commit)
+* Updated the Optimus.wdl to run on Azure.
+
+
 # 6.4.1
 2024-02-29 (Date of Last Commit)
 * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Optimus workflow
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 159490afbf..ccfa5e35e5 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -7,6 +7,7 @@ import "../../../tasks/skylab/RunEmptyDrops.wdl" as RunEmptyDrops
 import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks
 import "../../../tasks/skylab/MergeSortBam.wdl" as Merge
 import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
+import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Optimus {
   meta {
@@ -14,6 +15,8 @@ workflow Optimus {
   }
 
   input {
+    String cloud_provider
+
     # Mode for counting either "sc_rna" or "sn_rna"
     String counting_mode = "sc_rna"
 
@@ -45,36 +48,71 @@ workflow Optimus {
 
     # Set to true to override input checks and allow pipeline to proceed with invalid input
     Boolean force_no_check = false
-    
+
     # Check that tenx_chemistry_version matches the length of the read 1 fastq;
     # Set to true if you expect that r1_read_length does not match length of UMIs/barcodes for 10x chemistry v2 (26 bp) or v3 (28 bp).
     Boolean ignore_r1_read_length = false
 
     # Set to Forward, Reverse, or Unstranded to account for stranded library preparations (per STARsolo documentation)
     String star_strand_mode = "Forward"
-    
+
 # Set to true to count reads aligned to exonic regions in sn_rna mode
     Boolean count_exons = false
 
     # this pipeline does not set any preemptible varibles and only relies on the task-level preemptible settings
     # you could override the tasklevel preemptible settings by passing it as one of the workflows inputs
     # for example: `"Optimus.StarAlign.preemptible": 3` will let the StarAlign task, which by default disables the
-    # usage of preemptible machines, attempt to request for preemptible instance up to 3 times. 
+    # usage of preemptible machines, attempt to request for preemptible instance up to 3 times.
   }
 
   # version of this pipeline
 
-  String pipeline_version = "6.4.1"
+  String pipeline_version = "6.4.2"
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
   Array[Int] indices = range(length(r1_fastq))
 
   # 10x parameters
-  File whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt"
-  File whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt"
+  File gcp_whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt"
+  File gcp_whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt"
+  File azure_whitelist_v2 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/737k-august-2016.txt"
+  File azure_whitelist_v3 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/3M-febrary-2018.txt"
+
   # Takes the first read1 FASTQ from the inputs to check for chemistry match
   File r1_single_fastq = r1_fastq[0]
 
+  # docker images
+  String picard_cloud_docker = "picard-cloud:2.26.10"
+  String pytools_docker = "pytools:1.0.0-1661263730"
+  String empty_drops_docker = "empty-drops:1.0.1-4.2"
+  String star_docker = "star:1.0.1-2.7.11a-1692706072"
+  String warp_tools_docker_2_0_1 = "warp-tools:2.0.1"
+  String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985"
+  #TODO how do we handle these?
+  String alpine_docker = "alpine-bash:latest"
+  String gcp_alpine_docker_prefix = "bashell/"
+  String acr_alpine_docker_prefix = "dsppipelinedev.azurecr.io/"
+  String alpine_docker_prefix = if cloud_provider == "gcp" then gcp_alpine_docker_prefix else acr_alpine_docker_prefix
+
+  String ubuntu_docker = "ubuntu_16_0_4:latest"
+  String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
+  String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/"
+  String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix
+
+  String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
+  String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
+
+  # choose docker prefix based on cloud provider
+  String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+
+  # make sure either gcp or azr is supplied as cloud_provider input
+  if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+    call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+      input:
+        message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+    }
+  }
+
   parameter_meta {
     r1_fastq: "forward read, contains cell barcodes and molecule barcodes"
     r2_fastq: "reverse read, contains cDNA fragment generated from captured mRNA"
@@ -96,16 +134,21 @@ workflow Optimus {
       force_no_check = force_no_check,
       counting_mode = counting_mode,
       count_exons = count_exons,
-      whitelist_v2 = whitelist_v2,
-      whitelist_v3 = whitelist_v3,
+      gcp_whitelist_v2 = gcp_whitelist_v2,
+      gcp_whitelist_v3 = gcp_whitelist_v3,
+      azure_whitelist_v2 = azure_whitelist_v2,
+      azure_whitelist_v3 = azure_whitelist_v3,
       tenx_chemistry_version = tenx_chemistry_version,
       r1_fastq = r1_single_fastq,
-      ignore_r1_read_length = ignore_r1_read_length
+      ignore_r1_read_length = ignore_r1_read_length,
+      cloud_provider = cloud_provider,
+      alpine_docker_path = alpine_docker_prefix + alpine_docker
   }
 
   call StarAlign.STARGenomeRefVersion as ReferenceCheck {
     input:
-      tar_star_reference = tar_star_reference
+      tar_star_reference = tar_star_reference,
+      ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker
   }
 
   call FastqProcessing.FastqProcessing as SplitFastq {
@@ -116,7 +159,8 @@ workflow Optimus {
       whitelist = whitelist,
       chemistry = tenx_chemistry_version,
       sample_id = input_id,
-      read_struct = read_struct
+      read_struct = read_struct,
+      warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
   }
 
   scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
@@ -131,21 +175,24 @@ workflow Optimus {
         counting_mode = counting_mode,
         count_exons = count_exons,
         output_bam_basename = output_bam_basename + "_" + idx,
-        soloMultiMappers = soloMultiMappers
+        soloMultiMappers = soloMultiMappers,
+        star_docker_path = docker_prefix + star_docker
     }
   }
   call Merge.MergeSortBamFiles as MergeBam {
     input:
       bam_inputs = STARsoloFastq.bam_output,
       output_bam_filename = output_bam_basename + ".bam",
-      sort_order = "coordinate"
+      sort_order = "coordinate",
+      picard_cloud_docker_path = docker_prefix + picard_cloud_docker
   }
   call Metrics.CalculateGeneMetrics as GeneMetrics {
     input:
       bam_input = MergeBam.output_bam,
       mt_genes = mt_genes,
       original_gtf = annotations_gtf,
-      input_id = input_id
+      input_id = input_id,
+      warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
   }
 
   call Metrics.CalculateCellMetrics as CellMetrics {
@@ -153,7 +200,8 @@ workflow Optimus {
       bam_input = MergeBam.output_bam,
       mt_genes = mt_genes,
       original_gtf = annotations_gtf,
-      input_id = input_id
+      input_id = input_id,
+      warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
   }
 
   call StarAlign.MergeStarOutput as MergeStarOutputs {
@@ -165,7 +213,9 @@ workflow Optimus {
       summary = STARsoloFastq.summary,
       align_features = STARsoloFastq.align_features,
       umipercell = STARsoloFastq.umipercell,
-      input_id = input_id
+      input_id = input_id,
+      counting_mode = counting_mode,
+      warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2
   }
   if (counting_mode == "sc_rna"){
     call RunEmptyDrops.RunEmptyDrops {
@@ -173,7 +223,8 @@ workflow Optimus {
         sparse_count_matrix = MergeStarOutputs.sparse_counts,
         row_index = MergeStarOutputs.row_index,
         col_index = MergeStarOutputs.col_index,
-        emptydrops_lower = emptydrops_lower
+        emptydrops_lower = emptydrops_lower,
+        empty_drops_docker_path = docker_prefix + empty_drops_docker
     }
   }
 
@@ -192,7 +243,8 @@ workflow Optimus {
         gene_id = MergeStarOutputs.col_index,
         empty_drops_result = RunEmptyDrops.empty_drops_result,
         counting_mode = counting_mode,
-        pipeline_version = "Optimus_v~{pipeline_version}"
+        pipeline_version = "Optimus_v~{pipeline_version}",
+        warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
     }
   }
   if (count_exons  && counting_mode=="sn_rna") {
@@ -202,7 +254,13 @@ workflow Optimus {
         features = STARsoloFastq.features_sn_rna,
         matrix = STARsoloFastq.matrix_sn_rna,
         cell_reads = STARsoloFastq.cell_reads_sn_rna,
-        input_id = input_id
+        input_id = input_id,
+        counting_mode = "sc_rna",
+        summary = STARsoloFastq.summary_sn_rna,
+        align_features = STARsoloFastq.align_features_sn_rna,
+        umipercell = STARsoloFastq.umipercell_sn_rna,
+        input_id = input_id,
+        warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2
     }
     call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{
       input:
@@ -219,7 +277,8 @@ workflow Optimus {
         sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts,
         cell_id_exon = MergeStarOutputsExons.row_index,
         gene_id_exon = MergeStarOutputsExons.col_index,
-        pipeline_version = "Optimus_v~{pipeline_version}"
+        pipeline_version = "Optimus_v~{pipeline_version}",
+        warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
     }
   }
 
@@ -238,11 +297,13 @@ workflow Optimus {
     File gene_metrics = GeneMetrics.gene_metrics
     File? cell_calls = RunEmptyDrops.empty_drops_result
     File? aligner_metrics = MergeStarOutputs.cell_reads_out
+    File? library_metrics = MergeStarOutputs.library_metrics
     Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix
     Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix
     Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix
     Array[File?] multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix
 
+
     # h5ad
     File h5ad_output_file = final_h5ad_output
   }
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json
index 612659d25c..667e632bbd 100644
--- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json
+++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json
@@ -15,5 +15,6 @@
   "Optimus.input_id": "pbmc_human_v3",
   "Optimus.tenx_chemistry_version": "3",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf",
-  "Optimus.star_strand_mode": "Forward"
+  "Optimus.star_strand_mode": "Forward",
+  "Optimus.cloud_provider": "gcp"
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json
index 0dc26af9fd..33e7553cb4 100644
--- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json
+++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json
@@ -27,5 +27,6 @@
   "Optimus.input_id": "neurons2k_mouse",
   "Optimus.tenx_chemistry_version": "2",
   "Optimus.star_strand_mode": "Unstranded",
-  "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf"
+  "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf",
+  "Optimus.cloud_provider": "gcp"
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json
index 787a1a8347..fef0bd0f76 100644
--- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json
+++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json
@@ -25,5 +25,6 @@
   "Optimus.star_strand_mode": "Unstranded",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf",
   "Optimus.counting_mode": "sn_rna",
-  "Optimus.count_exons": true
+  "Optimus.count_exons": true,
+  "Optimus.cloud_provider": "gcp"
 }
diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index 17255ab77f..ca066704a4 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,3 +1,7 @@
+# 0.2.1
+2024-03-01 (Date of Last Commit)
+* Updated the Optimus.wdl to run on Azure. This change does not affect the PairedTag pipeline.
+
 # 0.2.0
 2024-02-29 (Date of Last Commit)
 * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Paired-tag workflow
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index eb11e9acc4..29d2594152 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -5,7 +5,7 @@ import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
 import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing
 workflow PairedTag {
-    String pipeline_version = "0.2.0"
+    String pipeline_version = "0.2.1"
 
     input {
         String input_id
diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
index e041750353..1817b2665b 100644
--- a/pipelines/skylab/slideseq/SlideSeq.changelog.md
+++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,3 +1,11 @@
+# 3.1.3
+2024-03-01 (Date of Last Commit)
+* Updated the Optimus.wdl to run on Azure. This change does not affect the SlideSeq pipeline.
+
+# 3.1.2
+2024-02-28 (Date of Last Commit)
+* Updated the Optimus workflow to produce a library-level metrics CSV; this does not impact the slide-seq pipeline
+
 # 3.1.1
 2024-02-29 (Date of Last Commit)
 * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Slideseq workflow
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 66f6001da8..bc8df16dde 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -6,6 +6,8 @@ import "../../../tasks/skylab/Metrics.wdl" as Metrics
 import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks
 import "../../../tasks/skylab/MergeSortBam.wdl" as Merge
+import "../../../tasks/broad/Utilities.wdl" as utils
+
 
 ## Copyright Broad Institute, 2022
 ##
@@ -23,7 +25,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.1"
+    String pipeline_version = "3.1.3"
 
     input {
         Array[File] r1_fastq
@@ -39,6 +41,33 @@ workflow SlideSeq {
         Boolean count_exons = true
         File bead_locations
 
+        String cloud_provider
+
+    }
+
+    # docker images
+    String pytools_docker = "pytools:1.0.0-1661263730"
+    String picard_cloud_docker = "picard-cloud:2.26.10"
+    String warp_tools_docker_2_0_1 = "warp-tools:2.0.1"
+    String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985"
+
+    String ubuntu_docker = "ubuntu_16_0_4:latest"
+    String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
+    String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/"
+    String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix
+
+    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
+    String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
+
+    # choose docker prefix based on cloud provider
+    String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+
+    # make sure either gcp or azr is supplied as cloud_provider input
+    if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+        call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+            input:
+                message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+        }
     }
 
     parameter_meta {
@@ -51,7 +80,8 @@ workflow SlideSeq {
 
     call StarAlign.STARGenomeRefVersion as ReferenceCheck {
         input:
-          tar_star_reference = tar_star_reference
+          tar_star_reference = tar_star_reference,
+          ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker
     }
 
     call Metrics.FastqMetricsSlideSeq as FastqMetrics {
@@ -86,13 +116,15 @@ workflow SlideSeq {
         input:
             bam_inputs = STARsoloFastqSlideSeq.bam_output,
             output_bam_filename = output_bam_basename + ".bam",
-            sort_order = "coordinate"
+            sort_order = "coordinate",
+            picard_cloud_docker_path = docker_prefix + picard_cloud_docker
     }
     call Metrics.CalculateGeneMetrics as GeneMetrics {
         input:
             bam_input = MergeBam.output_bam,
             original_gtf = annotations_gtf,
-            input_id = input_id
+            input_id = input_id,
+            warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
     }
     call Metrics.CalculateUMIsMetrics as UMIsMetrics {
         input:
@@ -105,7 +137,9 @@ workflow SlideSeq {
         input:
             bam_input = MergeBam.output_bam,
             original_gtf = annotations_gtf,
-            input_id = input_id
+            input_id = input_id,
+            warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
+
     }
 
     call StarAlign.MergeStarOutput as MergeStarOutputs {
@@ -113,7 +147,8 @@ workflow SlideSeq {
             barcodes = STARsoloFastqSlideSeq.barcodes,
             features = STARsoloFastqSlideSeq.features,
             matrix = STARsoloFastqSlideSeq.matrix,
-            input_id = input_id
+            input_id = input_id,
+            warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2
     }
     if ( !count_exons ) {
         call H5adUtils.OptimusH5adGeneration as SlideseqH5adGeneration{
@@ -126,7 +161,9 @@ workflow SlideSeq {
                 cell_id = MergeStarOutputs.row_index,
                 gene_id = MergeStarOutputs.col_index,
                 add_emptydrops_data = "no",
-                pipeline_version = "SlideSeq_v~{pipeline_version}"
+                pipeline_version = "SlideSeq_v~{pipeline_version}",
+                warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
+
         }
     }
     if (count_exons) {
@@ -135,7 +172,8 @@ workflow SlideSeq {
                 barcodes = STARsoloFastqSlideSeq.barcodes_sn_rna,
                 features = STARsoloFastqSlideSeq.features_sn_rna,
                 matrix = STARsoloFastqSlideSeq.matrix_sn_rna,
-                input_id = input_id
+                input_id = input_id,
+                warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2
         }
         call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{
             input:
@@ -149,7 +187,8 @@ workflow SlideSeq {
                 sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts,
                 cell_id_exon = MergeStarOutputsExons.row_index,
                 gene_id_exon = MergeStarOutputsExons.col_index,
-                pipeline_version = "SlideSeq_v~{pipeline_version}"
+                pipeline_version = "SlideSeq_v~{pipeline_version}",
+                warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1
         }
     }
 
diff --git a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json
index d8998d1d9b..035b22c58e 100644
--- a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json
+++ b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json
@@ -13,5 +13,6 @@
   "SlideSeq.tar_star_reference": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/star/modified_star_2.7.9a_primary_gencode_mouse_vM23.tar",
   "SlideSeq.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf",
   "SlideSeq.count_exons": true,
-  "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv"
+  "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv",
+  "SlideSeq.cloud_provider": "gcp"
 }
\ No newline at end of file
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
index 64b516e8b9..d3c50e9282 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
@@ -1,3 +1,13 @@
+# 1.3.2
+2024-03-01 (Date of Last Commit)
+
+* Updated the Optimus.wdl to run on Azure. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline.
+
+# 1.3.1
+2024-02-28 (Date of Last Commit)
+
+* Updated the Optimus workflow to produce a library-level metrics CSV; this does not impact the Single-nucleus Multi Sample Smart-seq2 pipeline
+
 # 1.3.0
 2024-01-22 (Date of Last Commit)
 
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
index 7a4c1066f8..312e447204 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
@@ -6,6 +6,7 @@ import "../../../tasks/skylab/StarAlign.wdl" as StarAlign
 import "../../../tasks/skylab/Picard.wdl" as Picard
 import "../../../tasks/skylab/FeatureCounts.wdl" as CountAlignments
 import "../../../tasks/skylab/LoomUtils.wdl" as LoomUtils
+import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow MultiSampleSmartSeq2SingleNucleus {
   meta {
@@ -38,9 +39,25 @@ workflow MultiSampleSmartSeq2SingleNucleus {
       Array[String]? organ
       String? input_name_metadata_field
       String? input_id_metadata_field
+
+      String cloud_provider
+  }
+
+  String ubuntu_docker = "ubuntu_16_0_4:latest"
+  String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
+  String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/"
+  String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix
+
+  # make sure either gcp or azr is supplied as cloud_provider input
+  if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+      call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+          input:
+              message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+      }
   }
+
   # Version of this pipeline
-  String pipeline_version = "1.3.0"
+  String pipeline_version = "1.3.2"
 
   if (false) {
      String? none = "None"
@@ -72,7 +89,8 @@ workflow MultiSampleSmartSeq2SingleNucleus {
   
   call StarAlign.STARGenomeRefVersion as ReferenceCheck {
     input:
-      tar_star_reference = tar_star_reference
+      tar_star_reference = tar_star_reference,
+      ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker
   }
 
   call TrimAdapters.TrimAdapters as TrimAdapters {
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json
index 8fafd92173..db8f68b114 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json
@@ -18,5 +18,6 @@
     "SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA",
     "SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT"
   ],
-  "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644"
+  "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644",
+  "MultiSampleSmartSeq2SingleNucleus.cloud_provider": "gcp"
 }
diff --git a/pipelines/skylab/snM3C/snM3C.changelog.md b/pipelines/skylab/snM3C/snM3C.changelog.md
index dc90a21239..f3fb853b6c 100644
--- a/pipelines/skylab/snM3C/snM3C.changelog.md
+++ b/pipelines/skylab/snM3C/snM3C.changelog.md
@@ -1,7 +1,7 @@
 # 2.0.1
 2024-2-15 (Date of Last Commit)
 
-* Updated the snM3C task memory, disk, and CPUs
+* Updated the snM3C task memory, disk, and CPUs 
 
 # 2.0.0
 2024-2-13 (Date of Last Commit)
diff --git a/pipelines/skylab/snM3C/snM3C.wdl b/pipelines/skylab/snM3C/snM3C.wdl
index bac72eb68c..bcdc71a861 100644
--- a/pipelines/skylab/snM3C/snM3C.wdl
+++ b/pipelines/skylab/snM3C/snM3C.wdl
@@ -23,6 +23,7 @@ workflow snM3C {
         Int num_downstr_bases = 2
         Int compress_level = 5
         Int batch_number
+
     }
 
     # version of the pipeline
diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl
index b24c77c133..89b99c7798 100644
--- a/tasks/skylab/CheckInputs.wdl
+++ b/tasks/skylab/CheckInputs.wdl
@@ -55,6 +55,8 @@ task checkInputArrays {
 
 task checkOptimusInput {
   input {
+    String cloud_provider
+    #String SAS_TOKEN
     File r1_fastq
     String counting_mode
     Boolean force_no_check
@@ -63,9 +65,12 @@ task checkOptimusInput {
     Int machine_mem_mb = 1000
     Int cpu = 1
     Int tenx_chemistry_version
-    String whitelist_v2
-    String whitelist_v3
+    String gcp_whitelist_v2
+    String gcp_whitelist_v3
+    String azure_whitelist_v2
+    String azure_whitelist_v3
     Boolean ignore_r1_read_length
+    String alpine_docker_path
   }  
 
   meta {
@@ -108,15 +113,36 @@ task checkOptimusInput {
         echo "ERROR: Invalid value count_exons should not be used with \"${counting_mode}\" input."
       fi
     fi
+
     # Check for chemistry version to produce read structure and whitelist
     if [[ ~{tenx_chemistry_version} == 2 ]]
       then
-      WHITELIST=~{whitelist_v2}
+      if [[ "~{cloud_provider}" == "gcp" ]]
+      then
+        WHITELIST=~{gcp_whitelist_v2}
+      elif [[ "~{cloud_provider}" == "azure" ]]
+      then
+        WHITELIST=~{azure_whitelist_v2}
+      else
+        pass="false"
+        echo "ERROR: Cloud provider must be either gcp or azure"
+      fi
+      echo "WHITELIST:" $WHITELIST
       echo $WHITELIST > whitelist.txt
       echo 16C10M > read_struct.txt
     elif [[ ~{tenx_chemistry_version} == 3 ]]
       then
-      WHITELIST=~{whitelist_v3}
+      if [[ "~{cloud_provider}" == "gcp" ]]
+      then
+        WHITELIST=~{gcp_whitelist_v3}
+      elif [[ "~{cloud_provider}" == "azure" ]]
+      then
+        WHITELIST=~{azure_whitelist_v3}
+      else
+        pass="false"
+        echo "ERROR: Cloud provider must be either gcp or azure"
+      fi
+      echo "WHITELIST:" $WHITELIST
       echo $WHITELIST > whitelist.txt
       echo 16C12M > read_struct.txt
     else
@@ -153,7 +179,7 @@ task checkOptimusInput {
     String read_struct_out = read_string("read_struct.txt")
   }
   runtime {
-    docker: "bashell/alpine-bash:latest"
+    docker: alpine_docker_path
     cpu: cpu
     memory: "~{machine_mem_mb} MiB"
     disks: "local-disk ~{disk} HDD"
diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index a4d7a8e615..939d1e1e12 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -11,7 +11,8 @@ task FastqProcessing {
     String read_struct
 
     #using the latest build of warp-tools in GCR
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+    String warp_tools_docker_path
+
     #runtime values
     Int machine_mem_mb = 40000
     Int cpu = 16   
@@ -34,7 +35,7 @@ task FastqProcessing {
     whitelist: "10x genomics cell barcode whitelist"
     chemistry: "chemistry employed, currently can be tenX_v2 or tenX_v3, the latter implies NO feature barcodes"
     sample_id: "name of sample matching this file, inserted into read group header"
-    docker: "(optional) the docker image containing the runtime environment for this task"
+    warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task"
     machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
     cpu: "(optional) the number of cpus to provision for this task"
     disk: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -111,7 +112,7 @@ task FastqProcessing {
   }
   
   runtime {
-    docker: docker
+    docker: warp_tools_docker_path
     memory: "${machine_mem_mb} MiB"
     disks: "local-disk ${disk} HDD"
     disk: disk + " GB" # TES
diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl
index 18fed45fc1..99ef957e4b 100644
--- a/tasks/skylab/H5adUtils.wdl
+++ b/tasks/skylab/H5adUtils.wdl
@@ -6,7 +6,7 @@ task OptimusH5adGeneration {
 
   input {
     #runtime values
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+    String warp_tools_docker_path
     # name of the sample
     String input_id
     # user provided id
@@ -88,7 +88,7 @@ task OptimusH5adGeneration {
   >>>
 
   runtime {
-    docker: docker
+    docker: warp_tools_docker_path
     cpu: cpu  # note that only 1 thread is supported by pseudobam
     memory: "~{machine_mem_mb} MiB"
     disks: "local-disk ~{disk} HDD"
@@ -105,7 +105,7 @@ task SingleNucleusOptimusH5adOutput {
 
     input {
         #runtime values
-        String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+        String warp_tools_docker_path
         # name of the sample
         String input_id
         # user provided id
@@ -170,7 +170,7 @@ task SingleNucleusOptimusH5adOutput {
     }
 
     runtime {
-        docker: docker
+        docker: warp_tools_docker_path
         cpu: cpu  # note that only 1 thread is supported by pseudobam
         memory: "~{machine_mem_mb} MiB"
         disks: "local-disk ~{disk} HDD"
diff --git a/tasks/skylab/MergeSortBam.wdl b/tasks/skylab/MergeSortBam.wdl
index 229ed18f8a..23ea466708 100644
--- a/tasks/skylab/MergeSortBam.wdl
+++ b/tasks/skylab/MergeSortBam.wdl
@@ -9,7 +9,7 @@ task MergeSortBamFiles {
     Int compression_level = 5
 
     # runtime values
-    String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
+    String picard_cloud_docker_path
     Int machine_mem_mb = 18150
     Int cpu = 1
     # default to 500GiB of space
@@ -28,7 +28,7 @@ task MergeSortBamFiles {
   parameter_meta {
     bam_inputs: "Merges Sam/Bam files"
     sort_order: "sort order of output bam"
-    docker: "(optional) the docker image containing the runtime environment for this task"
+    picard_cloud_docker_path: "(optional) the docker image containing the runtime environment for this task"
     machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
     cpu: "(optional) the number of cpus to provision for this task"
     disk: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -47,7 +47,7 @@ task MergeSortBamFiles {
   }
 
   runtime {
-    docker: docker
+    docker: picard_cloud_docker_path
     memory: "${machine_mem_mb} MiB"
     disks: "local-disk ${disk} HDD"
     disk: disk + " GB" # TES
diff --git a/tasks/skylab/Metrics.wdl b/tasks/skylab/Metrics.wdl
index fb91283d71..76b85d1012 100644
--- a/tasks/skylab/Metrics.wdl
+++ b/tasks/skylab/Metrics.wdl
@@ -8,7 +8,8 @@ task CalculateCellMetrics {
     String input_id
 
     # runtime values
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+
+    String warp_tools_docker_path
     Int machine_mem_mb = 8000
     Int cpu = 4
     Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) 
@@ -21,7 +22,7 @@ task CalculateCellMetrics {
 
   parameter_meta {
     bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GX)"
-    docker: "(optional) the docker image containing the runtime environment for this task"
+    warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task"
     machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
     cpu: "(optional) the number of cpus to provision for this task"
     disk: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -64,7 +65,7 @@ task CalculateCellMetrics {
   }
 
   runtime {
-    docker: docker
+    docker: warp_tools_docker_path
     memory: "${machine_mem_mb} MiB"
     disks: "local-disk ${disk} HDD"
     disk: disk + " GB" # TES
@@ -85,7 +86,7 @@ task CalculateGeneMetrics {
     String input_id
     # runtime values
 
-    String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+    String warp_tools_docker_path
     Int machine_mem_mb = 32000
     Int cpu = 4
     Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) 
@@ -99,7 +100,7 @@ task CalculateGeneMetrics {
 
   parameter_meta {
     bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GE)"
-    docker: "(optional) the docker image containing the runtime environment for this task"
+    warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task"
     machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
     cpu: "(optional) the number of cpus to provision for this task"
     disk: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -144,7 +145,7 @@ task CalculateGeneMetrics {
   }
 
   runtime {
-    docker: docker
+    docker: warp_tools_docker_path
     memory: "${machine_mem_mb} MiB"
     disks: "local-disk ${disk} HDD" 
     disk: disk + " GB" # TES
diff --git a/tasks/skylab/RunEmptyDrops.wdl b/tasks/skylab/RunEmptyDrops.wdl
index a0f60b1c99..0921393862 100644
--- a/tasks/skylab/RunEmptyDrops.wdl
+++ b/tasks/skylab/RunEmptyDrops.wdl
@@ -16,7 +16,7 @@ task RunEmptyDrops {
         Int emptydrops_lower = 100
 
         # runtime values
-        String docker = "us.gcr.io/broad-gotc-prod/empty-drops:1.0.1-4.2"
+        String empty_drops_docker_path
         Int machine_mem_mb = 32000
         Int cpu = 1
         Int disk = 20
@@ -48,7 +48,7 @@ task RunEmptyDrops {
     }
 
     runtime {
-        docker: docker
+        docker: empty_drops_docker_path
         memory: "${machine_mem_mb} MiB"
         disks: "local-disk ${disk} HDD"
         disk: disk_size + " GB" # TES
diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 81f6668c42..e6ddc818f5 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -226,7 +226,7 @@ task STARsoloFastq {
     String? soloMultiMappers
 
     # runtime values
-    String docker = "us.gcr.io/broad-gotc-prod/star:1.0.1-2.7.11a-1692706072"
+    String star_docker_path
     Int machine_mem_mb = 64000
     Int cpu = 8
     # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
@@ -244,7 +244,7 @@ task STARsoloFastq {
     r2_fastq: "array of forward read FASTQ files"
     tar_star_reference: "star reference tarball built against the species that the bam_input is derived from"
     star_strand_mode: "STAR mode for handling stranded reads. Options are 'Forward', 'Reverse, or 'Unstranded'"
-    docker: "(optional) the docker image containing the runtime environment for this task"
+    star_docker_path: "(optional) the docker image containing the runtime environment for this task"
     machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
     cpu: "(optional) the number of cpus to provision for this task"
     disk: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -432,7 +432,7 @@ task STARsoloFastq {
   >>>
 
   runtime {
-    docker: docker
+    docker: star_docker_path
     memory: "~{machine_mem_mb} MiB"
     disks: "local-disk ~{disk} HDD"
     disk: disk + " GB" # TES
@@ -475,11 +475,12 @@ task MergeStarOutput {
     Array[File]? summary
     Array[File]? align_features
     Array[File]? umipercell
-    
+    String? counting_mode
+
     String input_id
 
     #runtime values
-    String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
+    String warp_tools_docker_path
     Int machine_mem_gb = 20
     Int cpu = 1
     Int disk = ceil(size(matrix, "Gi") * 2) + 10
@@ -490,7 +491,7 @@ task MergeStarOutput {
   }
 
   parameter_meta {
-    docker: "(optional) the docker image containing the runtime environment for this task"
+    warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task"
     machine_mem_gb: "(optional) the amount of memory (GiB) to provision for this task"
     cpu: "(optional) the number of cpus to provision for this task"
     disk: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -564,15 +565,18 @@ task MergeStarOutput {
       fi
     done
     
-    # If text files are present, create a tar archive with them
+    # If text files are present, create a tar archive with them and run python script to combine shard metrics
     if ls *.txt 1> /dev/null 2>&1; then
+      echo "listing files"
+      ls
+      python3 /warptools/scripts/combine_shard_metrics.py ~{input_id}_summary.txt ~{input_id}_align_features.txt ~{input_id}_cell_reads.txt ~{counting_mode} ~{input_id}
       tar -zcvf ~{input_id}.star_metrics.tar *.txt
     else
       echo "No text files found in the folder."
     fi
 
    # create the  compressed raw count matrix with the counts, gene names and the barcodes
-    python3 /usr/gitc/create-merged-npz-output.py \
+    python3 /warptools/scripts/create-merged-npz-output.py \
         --barcodes ${barcodes_files[@]} \
         --features ${features_files[@]} \
         --matrix ${matrix_files[@]} \
@@ -580,7 +584,7 @@ task MergeStarOutput {
   >>>
 
   runtime {
-    docker: docker
+    docker: warp_tools_docker_path
     memory: "${machine_mem_gb} GiB"
     disks: "local-disk ${disk} HDD"
     disk: disk + " GB" # TES
@@ -593,6 +597,7 @@ task MergeStarOutput {
     File col_index = "~{input_id}_sparse_counts_col_index.npy"
     File sparse_counts = "~{input_id}_sparse_counts.npz"
     File? cell_reads_out = "~{input_id}.star_metrics.tar"
+    File? library_metrics="~{input_id}_library_metrics.csv"
   }
 }
 
@@ -717,6 +722,7 @@ task STARGenomeRefVersion {
   input {
     String tar_star_reference
     Int disk = 10
+    String ubuntu_docker_path
   }
 
   meta {
@@ -749,7 +755,7 @@ task STARGenomeRefVersion {
   }
 
   runtime {
-    docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest"
+    docker: ubuntu_docker_path
     memory: "2 GiB"
     disks: "local-disk ${disk} HDD"
     disk: disk + " GB" # TES
diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl
index a09838c3a4..228b6b1f41 100644
--- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl
+++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl
@@ -33,6 +33,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus {
       Boolean update_truth
       String vault_token_path
       String google_account_vault_path
+
+      String cloud_provider
     }
 
     meta {
@@ -57,7 +59,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus {
         species = species,
         organ = organ,
         input_name_metadata_field = input_name_metadata_field,
-        input_id_metadata_field = input_id_metadata_field
+        input_id_metadata_field = input_id_metadata_field,
+        cloud_provider = cloud_provider
   
     }
 
diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl
index 9a4a0ec83a..6da047efcc 100644
--- a/verification/test-wdls/TestMultiome.wdl
+++ b/verification/test-wdls/TestMultiome.wdl
@@ -10,6 +10,7 @@ workflow TestMultiome {
 
     input {
       String input_id
+      String cloud_provider
 
       # Optimus Inputs
       String counting_mode = "sn_rna"
@@ -85,7 +86,8 @@ workflow TestMultiome {
         chrom_sizes = chrom_sizes,
         atac_whitelist = atac_whitelist,
         run_cellbender = run_cellbender,
-        soloMultiMappers = soloMultiMappers
+        soloMultiMappers = soloMultiMappers,
+        cloud_provider = cloud_provider
   
     }
 
diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl
index 82bdf03adc..51e34e04e9 100644
--- a/verification/test-wdls/TestOptimus.wdl
+++ b/verification/test-wdls/TestOptimus.wdl
@@ -59,6 +59,8 @@ workflow TestOptimus {
     String vault_token_path
     String google_account_vault_path
 
+    String cloud_provider
+
   }
 
   meta {
@@ -84,7 +86,8 @@ workflow TestOptimus {
       star_strand_mode           = star_strand_mode,
       count_exons                = count_exons,
       ignore_r1_read_length      = ignore_r1_read_length,
-      soloMultiMappers           = soloMultiMappers
+      soloMultiMappers           = soloMultiMappers,
+      cloud_provider             = cloud_provider
   }
 
   # Collect all of the pipeling output into single Array
diff --git a/verification/test-wdls/TestSlideSeq.wdl b/verification/test-wdls/TestSlideSeq.wdl
index b63cd87099..b0523fee21 100644
--- a/verification/test-wdls/TestSlideSeq.wdl
+++ b/verification/test-wdls/TestSlideSeq.wdl
@@ -26,6 +26,7 @@ workflow TestSlideSeq {
       Boolean update_truth
       String vault_token_path
       String google_account_vault_path
+      String cloud_provider
     }
 
     meta {
@@ -43,7 +44,8 @@ workflow TestSlideSeq {
         annotations_gtf = annotations_gtf,
         output_bam_basename = output_bam_basename,
         count_exons = count_exons,
-        bead_locations = bead_locations
+        bead_locations = bead_locations,
+        cloud_provider = cloud_provider
   
     }
 
diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md
index 4f0750f35d..547bbeb5ac 100644
--- a/website/docs/Pipelines/ATAC/README.md
+++ b/website/docs/Pipelines/ATAC/README.md
@@ -8,7 +8,7 @@ slug: /Pipelines/ATAC/README
 
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
-| [1.1.8](https://github.com/broadinstitute/warp/releases) | January, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
+| [1.1.9](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
 ## Introduction to the ATAC workflow
diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md
index 3409347d3f..511f27c285 100644
--- a/website/docs/Pipelines/Multiome_Pipeline/README.md
+++ b/website/docs/Pipelines/Multiome_Pipeline/README.md
@@ -8,7 +8,7 @@ slug: /Pipelines/Multiome_Pipeline/README
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
 
-| [Multiome v3.2.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) |
+| [Multiome v3.2.2](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) |
 
 ![Multiome_diagram](./multiome_diagram.png)
 
@@ -56,6 +56,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta
 | Input name | Description | Type |
 | --- | --- | --- |
 | input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID. | String |
+| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String |
 | annotations_gtf | GTF file containing gene annotations used for GEX cell metric calculation and ATAC fragment metrics; must match the GTF used to build the STAR aligner. | File |
 | gex_r1_fastq | Array of read 1 FASTQ files representing a single GEX 10x library. | Array[File] |
 | gex_r2_fastq | Array of read 2 FASTQ files representing a single GEX 10x library.| Array[File] |
@@ -69,7 +70,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta
 | ignore_r1_read_length | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should ignore barcode chemistry check; if "true", the workflow will not ensure the `10x_chemistry_version` input matches the chemistry in the read 1 FASTQ; default is "false". | Boolean |
 | star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String |
 | count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean |
-| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt". | File |
+| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" when run on GCP. | File |
 | soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String |
 | atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] |
 | atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] |
diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md
index 382804e447..67a8ea0f7b 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/README.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/README.md
@@ -7,7 +7,7 @@ slug: /Pipelines/Optimus_Pipeline/README
 
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
-| [optimus_v6.4.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
+| [optimus_v6.4.2](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | March, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
 ![Optimus_diagram](Optimus_diagram.png)
@@ -85,6 +85,7 @@ The example configuration files also contain metadata for the reference files, d
 
 | Parameter name | Description | Optional attributes (when applicable) |
 | --- | --- | --- |
+| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String |
 | whitelist |  List of known CBs; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) whitelist that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom whitelist can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom whitelist, set the input `ignore_r1_read_length` to "true". | N/A |
 | read_struct | String describing the structure of reads; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) read structure that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom read structure can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom read structure, set the input `force_no_check` to "true". | N/A |
 | tar_star_reference | TAR file containing a species-specific reference genome and GTF; it is generated using the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl). | N/A |
diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md
index cc0114a766..40d588fb58 100644
--- a/website/docs/Pipelines/PairedTag_Pipeline/README.md
+++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md
@@ -7,7 +7,7 @@ slug: /Pipelines/PairedTag_Pipeline/README
 
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
-| [PairedTag_v0.1.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
+| [PairedTag_v0.2.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
 ## Introduction to the Paired-Tag workflow
diff --git a/website/docs/Pipelines/SlideSeq_Pipeline/README.md b/website/docs/Pipelines/SlideSeq_Pipeline/README.md
index 0b59323acf..7cf8c08935 100644
--- a/website/docs/Pipelines/SlideSeq_Pipeline/README.md
+++ b/website/docs/Pipelines/SlideSeq_Pipeline/README.md
@@ -7,7 +7,7 @@ slug: /Pipelines/SlideSeq_Pipeline/README
 
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
-| [SlideSeq v3.1.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
+| [SlideSeq v3.1.2](https://github.com/broadinstitute/warp/releases) | March, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
 
 ![SlideSeq_diagram](./slide-seq_diagram.png)
 
@@ -69,6 +69,7 @@ The Slide-seq workflow inputs are specified in JSON configuration files. Example
 | output_bam_basename | Optional string used for the output BAM file basename. | String |
 | count_exons | Optional boolean indicating if the workflow should calculate exon counts; default is set to “true” and produces an h5ad file containing both whole-gene counts and exon counts in an additional layer; when set to “false”, an h5ad file containing only whole-gene counts is produced. | Boolean |
 | bead_locations | Whitelist TSV file containing bead barcodes and XY coordinates on a single line for each bead; determined by sequencing prior to mRNA transfer and library preparation. | File |
+| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String |
 
 #### Pseudogene handling
 
diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
index 09acab0beb..1613d69876 100644
--- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
@@ -7,7 +7,7 @@ slug: /Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README
 
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
-| [MultiSampleSmartSeq2SingleNuclei_v1.3.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
+| [MultiSampleSmartSeq2SingleNuclei_v1.3.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
 
 ![](./snSS2.png)
 
@@ -82,6 +82,7 @@ The table below details the Multi-snSS2 inputs. The pipeline is designed to take
 | species | Optional description of the species from which the cells were derived. | Array of strings |
 | input_name_metadata_field | Optional input describing, when applicable, the metadata field containing the `input_names`. | String |
 | input_id_metadata_field | Optional string describing, when applicable, the metadata field containing the `input_ids`. | String |
+| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String |
 
 ## Multi-snSS2 tasks and tools
 
diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md
index 8ab56b15bd..a758e085cb 100644
--- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md
+++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md
@@ -2,13 +2,13 @@
 sidebar_position: 2
 ---
 
-# Smart-seq2 Single Nucleus Multi-Sample v1.3.0 Publication Methods
+# Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Publication Methods
 
 Below we provide an example methods section for a publication. For the complete pipeline documentation, see the [Smart-seq2 Single Nucleus Multi-Sample Overview](./README.md).
 
 ## Methods
 
-Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.0 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. 
+Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. 
 
 For each nucleus in the batch, paired-end FASTQ files were first trimmed to remove adapters using the fastq-mcf tool with a subsampling parameter of 200,000 reads. The trimmed FASTQ files were then aligned to the GENCODE GRCm38 mouse genome using STAR v.2.7.10a. To count the number of reads per gene, but not isoforms, the quantMode parameter was set to GeneCounts. Multi-mapped reads, and optical and PCR duplicates, were removed from the resulting aligned BAM using the Picard MarkDuplicates tool with REMOVE_DUPLICATES = true. Metrics were collected on the deduplicated BAM using Picard CollectMultipleMetrics with VALIDATION_STRINGENCY =SILENT.
 

From 76762df2f12e9077add4e9c42c2f9903e034e122 Mon Sep 17 00:00:00 2001
From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
Date: Thu, 14 Mar 2024 11:05:34 -0400
Subject: [PATCH 003/186] Ph pd 2514 multiome on terra (#1237)

* ph logic to pass in docker images based on cloud provider

* determine which whitelist files to use

* update tests

* add parameter metadata

* add error handling in atac

* fix comment

* PR comments

* update image and add utils

* add import

---------

Co-authored-by: phendriksen100 <103142505+phendriksen100@users.noreply.github.com>
---
 pipelines/skylab/multiome/Multiome.wdl        | 36 ++++++++++--
 pipelines/skylab/multiome/atac.json           |  1 +
 pipelines/skylab/multiome/atac.wdl            | 57 ++++++++++++++-----
 .../Plumbing/10k_pbmc_downsampled.json        |  1 +
 .../test_inputs/Scientific/10k_pbmc.json      |  1 +
 tasks/skylab/FastqProcessing.wdl              |  9 +--
 tasks/skylab/H5adUtils.wdl                    | 10 ++--
 tasks/skylab/PairedTagUtils.wdl               |  8 +--
 8 files changed, 88 insertions(+), 35 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 64aa671836..73ae9d9670 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -4,11 +4,13 @@ import "../../../pipelines/skylab/multiome/atac.wdl" as atac
 import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
 import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender
+import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
     String pipeline_version = "3.2.2"
 
     input {
+        String cloud_provider
         String input_id
         String cloud_provider
 
@@ -26,7 +28,6 @@ workflow Multiome {
         Boolean ignore_r1_read_length = false
         String star_strand_mode = "Forward"
         Boolean count_exons = false
-        File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
         String? soloMultiMappers
 
         # ATAC inputs
@@ -34,7 +35,6 @@ workflow Multiome {
         Array[File] atac_r1_fastq
         Array[File] atac_r2_fastq
         Array[File] atac_r3_fastq
-        
         # BWA tar reference
         File tar_bwa_reference
         # Chromosone sizes 
@@ -42,17 +42,42 @@ workflow Multiome {
         # Trimadapters input
         String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG"
         String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
-        # Whitelist
-        File atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt"
 
         # CellBender
         Boolean run_cellbender = false
 
     }
 
+    # Determine docker prefix based on cloud provider
+    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
+    String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
+    String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+
+    # Define docker images
+    String snap_atac_docker_image = "snapatac2:1.0.5-2.3.2-1709230223"
+
+    # Define all whitelist files
+    File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
+    File gcp_atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt"
+    File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
+    File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt"
+
+    # Determine which whitelist files to use based on cloud provider
+    File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist
+    File atac_whitelist = if cloud_provider == "gcp" then gcp_atac_whitelist else azure_atac_whitelist
+
+    # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error
+    if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+        call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+            input:
+                message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+        }
+    }
+
     # Call the Optimus workflow
     call optimus.Optimus as Optimus {
         input:
+            cloud_provider = cloud_provider,
             counting_mode = counting_mode,
             r1_fastq = gex_r1_fastq,
             r2_fastq = gex_r2_fastq,
@@ -76,6 +101,7 @@ workflow Multiome {
     # Call the ATAC workflow
     call atac.ATAC as Atac {
         input:
+            cloud_provider = cloud_provider,
             read1_fastq_gzipped = atac_r1_fastq,
             read2_fastq_gzipped = atac_r2_fastq,
             read3_fastq_gzipped = atac_r3_fastq,
@@ -89,6 +115,7 @@ workflow Multiome {
     }
     call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes {
         input:
+            docker_path = docker_prefix + snap_atac_docker_image,
             atac_h5ad = Atac.snap_metrics,
             gex_h5ad = Optimus.h5ad_output_file,
             gex_whitelist = gex_whitelist,
@@ -110,7 +137,6 @@ workflow Multiome {
                 hardware_preemptible_tries = 2,
                 hardware_zones = "us-central1-a us-central1-c",
                 nvidia_driver_version = "470.82.01"
-
         }
     }
 
diff --git a/pipelines/skylab/multiome/atac.json b/pipelines/skylab/multiome/atac.json
index a8b9465fdc..1e898edd48 100644
--- a/pipelines/skylab/multiome/atac.json
+++ b/pipelines/skylab/multiome/atac.json
@@ -4,6 +4,7 @@
   "ATAC.TrimAdapters.adapter_seq_read1": "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG",
   "ATAC.TrimAdapters.adapter_seq_read2": "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG",
   "ATAC.input_id": "scATAC",
+  "ATAC.cloud_provider":"gcp",
   "ATAC.tar_bwa_reference": "gs://fc-dd55e131-ef49-4d02-aa2a-20640daaae1e/submissions/8f0dd71a-b42f-4503-b839-3f146941758a/IndexRef/53a91851-1f6c-4ab9-af66-b338ffb28b5a/call-BwaMem2Index/GRCh38.primary_assembly.genome.bwamem2.fa.tar",
   "ATAC.preindex": "false"
 }
diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 0431ba3997..be597c1f62 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -3,6 +3,7 @@ version 1.0
 import "../../../tasks/skylab/MergeSortBam.wdl" as Merge
 import "../../../tasks/skylab/FastqProcessing.wdl" as FastqProcessing
 import "../../../tasks/skylab/PairedTagUtils.wdl" as AddBB
+import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow ATAC {
   meta {
@@ -18,6 +19,7 @@ workflow ATAC {
 
     # Output prefix/base name for all intermediate files and pipeline outputs
     String input_id
+    String cloud_provider
 
     # Option for running files with preindex
     Boolean preindex = false
@@ -43,6 +45,26 @@ workflow ATAC {
 
   String pipeline_version = "1.1.9"
 
+  # Determine docker prefix based on cloud provider
+  String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
+  String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
+  String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+
+  # Docker image names
+  String warp_tools_2_0_0 = "warp-tools:2.0.0"
+  String cutadapt_docker = "cutadapt:1.0.0-4.4-1709146458"
+  String samtools_docker = "samtools-dist-bwa:3.0.0"
+  String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
+  String snap_atac_docker = "snapatac2:1.0.4-2.3.1"
+
+  # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error
+  if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+    call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+        input:
+            message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+    }
+  }
+
   parameter_meta {
     read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads"
     read2_fastq_gzipped: "read 2 FASTQ file as input for the pipeline, contains the cellular barcodes corresponding to the reads in the read1 FASTQ and read 3 FASTQ"
@@ -52,7 +74,6 @@ workflow ATAC {
     num_threads_bwa: "Number of threads for bwa-mem2 task (default: 128)"
     mem_size_bwa: "Memory size in GB for bwa-mem2 task (default: 512)"
     cpu_platform_bwa: "CPU platform for bwa-mem2 task (default: Intel Ice Lake)"
-  
  }
 
   call GetNumSplits {
@@ -69,7 +90,8 @@ workflow ATAC {
       barcodes_fastq = read2_fastq_gzipped,
       output_base_name = input_id,
       num_output_files = GetNumSplits.ranks_per_node_out,
-      whitelist = whitelist
+      whitelist = whitelist,
+      docker_path = docker_prefix + warp_tools_2_0_0
   }
 
   scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
@@ -79,7 +101,8 @@ workflow ATAC {
         read3_fastq = SplitFastq.fastq_R3_output_array[idx],
         output_base_name = input_id + "_" + idx,
         adapter_seq_read1 = adapter_seq_read1,
-        adapter_seq_read3 = adapter_seq_read3
+        adapter_seq_read3 = adapter_seq_read3,
+        docker_path = docker_prefix + cutadapt_docker
     }
   }
 
@@ -91,21 +114,24 @@ workflow ATAC {
         output_base_name = input_id,
         nthreads = num_threads_bwa, 
         mem_size = mem_size_bwa,
-        cpu_platform = cpu_platform_bwa
+        cpu_platform = cpu_platform_bwa,
+        docker_path = docker_prefix + samtools_docker
   }
 
   if (preindex) {
     call AddBB.AddBBTag as BBTag {
       input:
         bam = BWAPairedEndAlignment.bam_aligned_output,
-        input_id = input_id
+        input_id = input_id,
+        docker_path = docker_prefix + upstools_docker
     }
     call CreateFragmentFile as BB_fragment {
       input:
         bam = BBTag.bb_bam,
         chrom_sizes = chrom_sizes,
         annotations_gtf = annotations_gtf,
-        preindex = preindex
+        preindex = preindex,
+        docker_path = docker_prefix + snap_atac_docker
     }
   }
   if (!preindex) {
@@ -114,7 +140,8 @@ workflow ATAC {
         bam = BWAPairedEndAlignment.bam_aligned_output,
         chrom_sizes = chrom_sizes,
         annotations_gtf = annotations_gtf,
-        preindex = preindex
+        preindex = preindex,
+        docker_path = docker_prefix + snap_atac_docker
 
     }
   }
@@ -231,7 +258,7 @@ task TrimAdapters {
     # Runtime attributes/docker
     Int disk_size = ceil(2 * ( size(read1_fastq, "GiB") + size(read3_fastq, "GiB") )) + 200
     Int mem_size = 4
-    String docker_image = "us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919"
+    String docker_path
   }
 
   parameter_meta {
@@ -242,7 +269,7 @@ task TrimAdapters {
     adapter_seq_read1: "cutadapt option for the sequence adapter for read 1 fastq"
     adapter_seq_read3: "cutadapt option for the sequence adapter for read 3 fastq"
     output_base_name: "base name to be used for the output of the task"
-    docker_image: "the docker image using cutadapt to be used (default:us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919)"
+    docker_path: "The docker image path containing the runtime environment for this task"
     mem_size: "the size of memory used during trimming adapters"
     disk_size : "disk size used in trimming adapters step"
   }
@@ -269,7 +296,7 @@ task TrimAdapters {
 
   # use docker image for given tool cutadapat
   runtime {
-    docker: docker_image
+    docker: docker_path
     disks: "local-disk ${disk_size} HDD"
     memory: "${mem_size} GiB"
   }
@@ -290,7 +317,7 @@ task BWAPairedEndAlignment {
     String read_group_sample_name = "RGSN1"
     String suffix = "trimmed_adapters.fastq.gz"
     String output_base_name
-    String docker_image = "us.gcr.io/broad-gotc-prod/samtools-dist-bwa:2.0.0"
+    String docker_path
 
     # Runtime attributes
     Int disk_size = 2000
@@ -309,7 +336,7 @@ task BWAPairedEndAlignment {
     mem_size: "the size of memory used during alignment"
     disk_size : "disk size used in bwa alignment step"
     output_base_name: "basename to be used for the output of the task"
-    docker_image: "the docker image using BWA to be used (default: us.gcr.io/broad-gotc-prod/samtools-bwa-mem-2:1.0.0-2.2.1_x64-linux-1685469504)"
+    docker_path: "The docker image path containing the runtime environment for this task"
   }
 
   String bam_aligned_output_name = output_base_name + ".bam"
@@ -418,7 +445,7 @@ task BWAPairedEndAlignment {
   >>>
 
   runtime {
-    docker: docker_image
+    docker: docker_path
     disks: "local-disk ${disk_size} SSD"
     cpu: nthreads
     cpuPlatform: cpu_platform
@@ -442,6 +469,7 @@ task CreateFragmentFile {
     Int mem_size = 16
     Int nthreads = 1
     String cpuPlatform = "Intel Cascade Lake"
+    String docker_path
   }
 
   String bam_base_name = basename(bam, ".bam")
@@ -452,6 +480,7 @@ task CreateFragmentFile {
     chrom_sizes: "Text file containing chrom_sizes for genome build (i.e. hg38)."
     disk_size: "Disk size used in create fragment file step."
     mem_size: "The size of memory used in create fragment file."
+    docker_path: "The docker image path containing the runtime environment for this task"
   }
 
   command <<<
@@ -492,7 +521,7 @@ task CreateFragmentFile {
   >>>
 
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1"
+    docker: docker_path
     disks: "local-disk ${disk_size} SSD"
     memory: "${mem_size} GiB"
     cpu: nthreads
diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
index bd9b7a1172..c4a7d6d5d7 100644
--- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
+++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -1,6 +1,7 @@
 {
   "Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf",
   "Multiome.input_id":"10k_PBMC_downsampled",
+  "Multiome.cloud_provider":"gcp",
   "Multiome.gex_r1_fastq":[
     "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_gex.fastq.gz"
   ],
diff --git a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json
index a5ddf2c947..3ca7b1d546 100644
--- a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json
+++ b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json
@@ -5,6 +5,7 @@
     "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_I1_001.fastq.gz"
   ],
   "Multiome.input_id":"10k_PBMC",
+  "Multiome.cloud_provider":"gcp",
   "Multiome.gex_r1_fastq":[
     "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R1_001.fastq.gz",
     "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R1_001.fastq.gz"
diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index 939d1e1e12..bd6f9b06b7 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -244,10 +244,7 @@ task FastqProcessATAC {
         String output_base_name
         File whitelist
         String barcode_index1 = basename(barcodes_fastq[0])
-
-        # [?] copied from corresponding optimus wdl for fastqprocessing
-        # using the latest build of warp-tools in GCR
-        String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
+        String docker_path
 
         # Runtime attributes [?]
         Int mem_size = 5
@@ -273,7 +270,7 @@ task FastqProcessATAC {
         read_structure: "A string that specifies the barcode (C) positions in the Read 2 fastq"
         barcode_orientation: "A string that specifies the orientation of barcode needed for scATAC data. The default is FIRST_BP. Other options include LAST_BP, FIRST_BP_RC or LAST_BP_RC."
         whitelist: "10x genomics cell barcode whitelist for scATAC"
-        docker: "(optional) the docker image containing the runtime environment for this task"
+        docker_path: "The docker image path containing the runtime environment for this task"
         mem_size: "(optional) the amount of memory (MiB) to provision for this task"
         cpu: "(optional) the number of cpus to provision for this task"
         disk_size: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -362,7 +359,7 @@ task FastqProcessATAC {
     >>>
 
     runtime {
-        docker: docker
+        docker: docker_path
         cpu: cpu
         memory: "${mem_size} MiB"
         disks: "local-disk ${disk_size} HDD"
diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl
index 99ef957e4b..54a27de18f 100644
--- a/tasks/skylab/H5adUtils.wdl
+++ b/tasks/skylab/H5adUtils.wdl
@@ -184,7 +184,7 @@ task SingleNucleusOptimusH5adOutput {
 }
 
 task JoinMultiomeBarcodes {
-    input {
+  input {
     File atac_h5ad
     File atac_fragment
     File gex_h5ad
@@ -196,9 +196,9 @@ task JoinMultiomeBarcodes {
     Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000
     Int disk =  ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10
   }
-    String gex_base_name = basename(gex_h5ad, ".h5ad")
-    String atac_base_name = basename(atac_h5ad, ".h5ad")
-    String atac_fragment_base = basename(atac_fragment, ".tsv")
+  String gex_base_name = basename(gex_h5ad, ".h5ad")
+  String atac_base_name = basename(atac_h5ad, ".h5ad")
+  String atac_fragment_base = basename(atac_fragment, ".tsv")
 
   parameter_meta {
     atac_h5ad: "The resulting h5ad from the ATAC workflow."
@@ -277,7 +277,7 @@ task JoinMultiomeBarcodes {
   >>>
 
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229"
+    docker: docker_path
     disks: "local-disk ~{disk} HDD"
     memory: "${machine_mem_mb} MiB"
     cpu: nthreads
diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl
index 779ac4fe57..3abc7df45a 100644
--- a/tasks/skylab/PairedTagUtils.wdl
+++ b/tasks/skylab/PairedTagUtils.wdl
@@ -130,9 +130,7 @@ task AddBBTag {
     input {
         File bam
         String input_id
-
-        # using the latest build of upstools docker in GCR
-        String docker = "us.gcr.io/broad-gotc-prod/upstools:1.0.0-2023.03.03-1704300311"
+        String docker_path
 
         # Runtime attributes
         Int mem_size = 8
@@ -150,7 +148,7 @@ task AddBBTag {
     parameter_meta {
         bam: "BAM with aligned reads and barcode in the CB tag"
         input_id: "input ID"
-        docker: "(optional) the docker image containing the runtime environment for this task"
+        docker_path: "The docker image path containing the runtime environment for this task"
         mem_size: "(optional) the amount of memory (MiB) to provision for this task"
         cpu: "(optional) the number of cpus to provision for this task"
         disk_size: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -169,7 +167,7 @@ task AddBBTag {
     >>>
 
     runtime {
-        docker: docker
+        docker: docker_path
         cpu: cpu
         memory: "${mem_size} GiB"
         disks: "local-disk ${disk_size} HDD"

From a859091b8d0d99c8ce3afda48ced2dc5b0bd5861 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 14 Mar 2024 11:25:42 -0400
Subject: [PATCH 004/186] lost the docker_path in joinmultiomebarcode task

---
 tasks/skylab/H5adUtils.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl
index 54a27de18f..f5e61243a5 100644
--- a/tasks/skylab/H5adUtils.wdl
+++ b/tasks/skylab/H5adUtils.wdl
@@ -195,6 +195,7 @@ task JoinMultiomeBarcodes {
     String cpuPlatform = "Intel Cascade Lake"
     Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000
     Int disk =  ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10
+    String docker_path
   }
   String gex_base_name = basename(gex_h5ad, ".h5ad")
   String atac_base_name = basename(atac_h5ad, ".h5ad")

From 831e2444d6e3d26b7156a565709f4ed4a10a4af0 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 14 Mar 2024 11:42:41 -0400
Subject: [PATCH 005/186] update TestMultiome.wdl

---
 verification/test-wdls/TestMultiome.wdl | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl
index 6da047efcc..1bc9953637 100644
--- a/verification/test-wdls/TestMultiome.wdl
+++ b/verification/test-wdls/TestMultiome.wdl
@@ -26,7 +26,6 @@ workflow TestMultiome {
       Boolean ignore_r1_read_length = false
       String star_strand_mode = "Forward"
       Boolean count_exons = false
-      File gex_whitelist = "gs://broad-gotc-test-storage/Multiome/input/737K-arc-v1_gex.txt"
       String? soloMultiMappers
 
       # ATAC inputs
@@ -43,8 +42,6 @@ workflow TestMultiome {
       # Trimadapters input
       String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG"
       String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
-      # Whitelist
-      File atac_whitelist = "gs://broad-gotc-test-storage/Multiome/input/737K-arc-v1_atac.txt"
 
       # These values will be determined and injected into the inputs by the scala test framework
       String truth_path
@@ -76,7 +73,6 @@ workflow TestMultiome {
         ignore_r1_read_length = ignore_r1_read_length,
         star_strand_mode = star_strand_mode,
         count_exons = count_exons,
-        gex_whitelist = gex_whitelist,
         atac_r1_fastq = atac_r1_fastq,
         atac_r2_fastq = atac_r2_fastq,
         atac_r3_fastq = atac_r3_fastq,
@@ -84,7 +80,6 @@ workflow TestMultiome {
         adapter_seq_read1 = adapter_seq_read1,
         adapter_seq_read3 = adapter_seq_read3,
         chrom_sizes = chrom_sizes,
-        atac_whitelist = atac_whitelist,
         run_cellbender = run_cellbender,
         soloMultiMappers = soloMultiMappers,
         cloud_provider = cloud_provider

From 53c7c2725d7b69b4b70bfd7a1498ffe389727e08 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 14 Mar 2024 13:09:53 -0400
Subject: [PATCH 006/186] update cutadapt wdl

---
 pipelines/skylab/multiome/atac.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index be597c1f62..4a36f1b95a 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -52,7 +52,7 @@ workflow ATAC {
 
   # Docker image names
   String warp_tools_2_0_0 = "warp-tools:2.0.0"
-  String cutadapt_docker = "cutadapt:1.0.0-4.4-1709146458"
+  String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919"
   String samtools_docker = "samtools-dist-bwa:3.0.0"
   String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
   String snap_atac_docker = "snapatac2:1.0.4-2.3.1"

From cbc01c38cbdeac372ac5df392a848042a56f3e19 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 14 Mar 2024 18:42:06 -0400
Subject: [PATCH 007/186] update cutadapt wdl

---
 pipelines/skylab/multiome/Multiome.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 236f5fd684..ebca6c2083 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -55,7 +55,7 @@ workflow Multiome {
     String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
 
     # Define docker images
-    String snap_atac_docker_image = "snapatac2:1.0.5-2.3.2-1709230223"
+    String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229"
 
     # Define all whitelist files
     File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"

From f3b0294b470ecf3a01e7e8f07c83fffe66b843a0 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 15 Mar 2024 09:50:43 -0400
Subject: [PATCH 008/186] try to fix changelog

---
 pipelines/skylab/paired_tag/PairedTag.changelog.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index 5eb6f52ac4..b97f823d6f 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,9 +1,8 @@
 # 0.3.1
-2024-03-01 (Date of Last Commit)
+2024-03-02 (Date of Last Commit)
 * Updated the Optimus.wdl to run on Azure. This change does not affect the PairedTag pipeline.
 
 # 0.3.0
-
 2024-03-01 (Date of Last Commit)
 
 * Added the gene expression library-level metrics CSV as output of the Paired-tag pipeline; this is produced by the Optimus subworkflow

From 66c9082429bfc3fd0fb6edff71fd1999067a56f9 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 15 Mar 2024 09:54:47 -0400
Subject: [PATCH 009/186] try to fix changelog

---
 pipelines/skylab/paired_tag/PairedTag.changelog.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index b97f823d6f..0e4b60be07 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,8 +1,10 @@
 # 0.3.1
 2024-03-02 (Date of Last Commit)
+
 * Updated the Optimus.wdl to run on Azure. This change does not affect the PairedTag pipeline.
 
 # 0.3.0
+
 2024-03-01 (Date of Last Commit)
 
 * Added the gene expression library-level metrics CSV as output of the Paired-tag pipeline; this is produced by the Optimus subworkflow

From e4fcd9918a8627b2614398ca6886c91ac2e90567 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 15 Mar 2024 13:35:39 -0400
Subject: [PATCH 010/186] remove cloud provider

---
 pipelines/skylab/multiome/Multiome.wdl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index ebca6c2083..9d9f257d15 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -13,7 +13,6 @@ workflow Multiome {
     input {
         String cloud_provider
         String input_id
-        String cloud_provider
 
         # Optimus Inputs
         String counting_mode = "sn_rna"

From f3b97c81c2e5c1a6c724a6bf832ea0692cd75424 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 18 Mar 2024 09:51:26 -0400
Subject: [PATCH 011/186] try adding sas token to azure public bucket

---
 pipelines/skylab/optimus/Optimus.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 689d99636b..feaa8d204a 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -76,8 +76,8 @@ workflow Optimus {
   # 10x parameters
   File gcp_whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt"
   File gcp_whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt"
-  File azure_whitelist_v2 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/737k-august-2016.txt"
-  File azure_whitelist_v3 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/3M-febrary-2018.txt"
+  File azure_whitelist_v2 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/737k-august-2016.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D"
+  File azure_whitelist_v3 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/3M-febrary-2018.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D"
 
   # Takes the first read1 FASTQ from the inputs to check for chemistry match
   File r1_single_fastq = r1_fastq[0]

From c5f2af5feaeaed845dd8a81644c75b2e7770b10e Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 18 Mar 2024 09:52:24 -0400
Subject: [PATCH 012/186] try adding sas token to azure public bucket

---
 pipelines/skylab/multiome/Multiome.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 9d9f257d15..b933c43be4 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -59,8 +59,8 @@ workflow Multiome {
     # Define all whitelist files
     File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
     File gcp_atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt"
-    File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
-    File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt"
+    File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D"
+    File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D"
 
     # Determine which whitelist files to use based on cloud provider
     File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist

From c5f462a77d8059ce8b7436e068149604dbe14af2 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 18 Mar 2024 14:54:02 -0400
Subject: [PATCH 013/186] try files not strings

---
 tasks/skylab/FastqProcessing.wdl | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index bd6f9b06b7..de605692a2 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -236,9 +236,9 @@ task FastqProcessingSlidSeq {
 task FastqProcessATAC {
 
     input {
-        Array[String] read1_fastq
-        Array[String] read3_fastq
-        Array[String] barcodes_fastq
+        Array[File] read1_fastq
+        Array[File] read3_fastq
+        Array[File] barcodes_fastq
         String read_structure = "16C"
         String barcode_orientation = "FIRST_BP_RC"
         String output_base_name
@@ -295,9 +295,18 @@ task FastqProcessATAC {
         echo $read1_fastq_files
         # Make downsample fq for barcode orientation check of R2 barcodes
         mkdir /cromwell_root/input_fastq
-        gcloud storage cp $read1_fastq_files /cromwell_root/input_fastq
-        gcloud storage cp $read2_fastq_files /cromwell_root/input_fastq
-        gcloud storage cp $read3_fastq_files /cromwell_root/input_fastq
+        mv $read1_fastq_files /cromwell_root/input_fastq
+        mv $read2_fastq_files /cromwell_root/input_fastq
+        mv $read3_fastq_files /cromwell_root/input_fastq
+
+        #gcloud storage cp $read1_fastq_files /cromwell_root/input_fastq
+        #gcloud storage cp $read2_fastq_files /cromwell_root/input_fastq
+        #gcloud storage cp $read3_fastq_files /cromwell_root/input_fastq
+
+        # Use azcopy to copy files from Azure Blob Storage
+        #azcopy copy $read1_fastq_files /cromwell_root/input_fastq #--recursive --from-to=BlobLocal --blob-type=BlockBlob --sas-token="~{azure_sas_token}"
+        #azcopy copy $read2_fastq_files /cromwell_root/input_fastq #--recursive --from-to=BlobLocal --blob-type=BlockBlob --sas-token="~{azure_sas_token}"
+        #azcopy copy $read3_fastq_files /cromwell_root/input_fastq #--recursive --from-to=BlobLocal --blob-type=BlockBlob --sas-token="~{azure_sas_token}"
 
         path="/cromwell_root/input_fastq/"
         barcode_index="~{barcode_index1}"

From e70547920f9d132066fa8deb8498e26c1bcc53f3 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 18 Mar 2024 15:38:17 -0400
Subject: [PATCH 014/186] try files not strings

---
 tasks/skylab/FastqProcessing.wdl | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index de605692a2..c899901c3d 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -303,11 +303,6 @@ task FastqProcessATAC {
         #gcloud storage cp $read2_fastq_files /cromwell_root/input_fastq
         #gcloud storage cp $read3_fastq_files /cromwell_root/input_fastq
 
-        # Use azcopy to copy files from Azure Blob Storage
-        #azcopy copy $read1_fastq_files /cromwell_root/input_fastq #--recursive --from-to=BlobLocal --blob-type=BlockBlob --sas-token="~{azure_sas_token}"
-        #azcopy copy $read2_fastq_files /cromwell_root/input_fastq #--recursive --from-to=BlobLocal --blob-type=BlockBlob --sas-token="~{azure_sas_token}"
-        #azcopy copy $read3_fastq_files /cromwell_root/input_fastq #--recursive --from-to=BlobLocal --blob-type=BlockBlob --sas-token="~{azure_sas_token}"
-
         path="/cromwell_root/input_fastq/"
         barcode_index="~{barcode_index1}"
         file="${path}${barcode_index}"

From 11835ddcb4b72964f17deb5173815057cc1e1e9d Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 19 Mar 2024 08:56:16 -0400
Subject: [PATCH 015/186] remove cromwell root

---
 tasks/skylab/FastqProcessing.wdl | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index c899901c3d..4a4aad4e1a 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -294,16 +294,16 @@ task FastqProcessATAC {
 
         echo $read1_fastq_files
         # Make downsample fq for barcode orientation check of R2 barcodes
-        mkdir /cromwell_root/input_fastq
-        mv $read1_fastq_files /cromwell_root/input_fastq
-        mv $read2_fastq_files /cromwell_root/input_fastq
-        mv $read3_fastq_files /cromwell_root/input_fastq
+        mkdir input_fastq
+        mv $read1_fastq_files input_fastq/
+        mv $read2_fastq_files input_fastq/
+        mv $read3_fastq_files input_fastq/
 
         #gcloud storage cp $read1_fastq_files /cromwell_root/input_fastq
         #gcloud storage cp $read2_fastq_files /cromwell_root/input_fastq
         #gcloud storage cp $read3_fastq_files /cromwell_root/input_fastq
 
-        path="/cromwell_root/input_fastq/"
+        path="input_fastq/"
         barcode_index="~{barcode_index1}"
         file="${path}${barcode_index}"
         zcat "$file" | sed -n '2~4p' | shuf -n 1000 > downsample.fq
@@ -313,7 +313,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ2_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R1 /cromwell_root/input_fastq/$BASE`
+            BASE=`echo --R1 input_fastq/$BASE`
             R1_FILES_CONCAT+="$BASE "
         done
         echo $R1_FILES_CONCAT
@@ -323,7 +323,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ1_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R2 /cromwell_root/input_fastq/$BASE`
+            BASE=`echo --R2 /input_fastq/$BASE`
             R2_FILES_CONCAT+="$BASE "
         done
         echo $R2_FILES_CONCAT
@@ -333,7 +333,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ3_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R3 /cromwell_root/input_fastq/$BASE`
+            BASE=`echo --R3 /input_fastq/$BASE`
             R3_FILES_CONCAT+="$BASE "
         done
         echo $R3_FILES_CONCAT
@@ -346,8 +346,8 @@ task FastqProcessATAC {
 
         # Call fastq process
         # outputs fastq files where the corrected barcode is in the read name
-        mkdir /cromwell_root/output_fastq
-        cd /cromwell_root/output_fastq
+        mkdir output_fastq/
+        cd /output_fastq
 
         fastqprocess \
         --num-output-files ~{num_output_files} \

From a82926083378d0a431dbfe46e9e4a33fa4caa8c2 Mon Sep 17 00:00:00 2001
From: kayleemathews <kmathews@broadinstitute.org>
Date: Tue, 19 Mar 2024 10:48:28 -0400
Subject: [PATCH 016/186] update docs

---
 website/docs/Pipelines/ATAC/README.md                         | 1 +
 website/docs/Pipelines/Multiome_Pipeline/README.md            | 4 +---
 website/docs/Pipelines/Optimus_Pipeline/README.md             | 2 +-
 website/docs/Pipelines/PairedTag_Pipeline/README.md           | 2 +-
 website/docs/Pipelines/SlideSeq_Pipeline/README.md            | 2 +-
 .../Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md | 2 +-
 .../multi_snss2.methods.md                                    | 4 ++--
 7 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md
index 547bbeb5ac..abe0d8c91c 100644
--- a/website/docs/Pipelines/ATAC/README.md
+++ b/website/docs/Pipelines/ATAC/README.md
@@ -50,6 +50,7 @@ The following describes the inputs of the ATAC workflow. For more details on how
 | read2_fastq_gzipped | Fastq inputs (array of compressed read 2 FASTQ files containing cellular barcodes). |
 | read3_fastq_gzipped | Fastq inputs (array of compressed read 3 FASTQ files). |
 | input_id | Output prefix/base name for all intermediate files and pipeline outputs. |
+| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String |
 | preindex | Boolean used for paired-tag data and not applicable to ATAC data types; default is set to false. | 
 | tar_bwa_reference | BWA reference (tar file containing reference fasta and corresponding files). |
 | num_threads_bwa | Optional integer defining the number of CPUs per node for the BWA-mem alignment task (default: 128). |
diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md
index 085369d5a0..3f7e5a04f3 100644
--- a/website/docs/Pipelines/Multiome_Pipeline/README.md
+++ b/website/docs/Pipelines/Multiome_Pipeline/README.md
@@ -8,7 +8,7 @@ slug: /Pipelines/Multiome_Pipeline/README
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
 
-| [Multiome v3.3.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) |
+| [Multiome v3.3.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) |
 
 ![Multiome_diagram](./multiome_diagram.png)
 
@@ -70,7 +70,6 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta
 | ignore_r1_read_length | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should ignore barcode chemistry check; if "true", the workflow will not ensure the `10x_chemistry_version` input matches the chemistry in the read 1 FASTQ; default is "false". | Boolean |
 | star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String |
 | count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean |
-| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" when run on GCP. | File |
 | soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String |
 | atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] |
 | atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] |
@@ -79,7 +78,6 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta
 | chrom_sizes | File containing the genome chromosome sizes; used to calculate ATAC fragment file metrics. | File |
 | adapter_seq_read1 | Optional string describing the adapter sequence for ATAC read 1 paired-end reads to be used during adapter trimming with Cutadapt; default is "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG". | String |
 | adapter_seq_read3 | Optional string describing the adapter sequence for ATAC read 2 paired-end reads to be used during adapter trimming with Cutadapt; default is "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG". | String |
-| atac_whitelist | Optional file containing the list of valid barcodes for 10x multiome ATAC adata; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt". | File |
 | run_cellbender | Optional boolean used to determine if the Optimus (GEX) pipeline should run CellBender on the output gene expression h5ad file, `h5ad_output_file_gex`; default is "false". | Boolean |
 
 #### Sample inputs for analyses in a Terra Workspace
diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md
index 843fc23220..8a79e553af 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/README.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/README.md
@@ -8,7 +8,7 @@ slug: /Pipelines/Optimus_Pipeline/README
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
 
-| [optimus_v6.5.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
+| [optimus_v6.5.1](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | March, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
 ![Optimus_diagram](Optimus_diagram.png)
diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md
index d90eb1f309..d0f5b42e47 100644
--- a/website/docs/Pipelines/PairedTag_Pipeline/README.md
+++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md
@@ -8,7 +8,7 @@ slug: /Pipelines/PairedTag_Pipeline/README
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
 
-| [PairedTag_v0.3.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
+| [PairedTag_v0.3.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
 ## Introduction to the Paired-Tag workflow
diff --git a/website/docs/Pipelines/SlideSeq_Pipeline/README.md b/website/docs/Pipelines/SlideSeq_Pipeline/README.md
index 153a8656ae..c56f4064d3 100644
--- a/website/docs/Pipelines/SlideSeq_Pipeline/README.md
+++ b/website/docs/Pipelines/SlideSeq_Pipeline/README.md
@@ -8,7 +8,7 @@ slug: /Pipelines/SlideSeq_Pipeline/README
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
 
-| [SlideSeq v3.1.2](https://github.com/broadinstitute/warp/releases) | February, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
+| [SlideSeq v3.1.3](https://github.com/broadinstitute/warp/releases) | March, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
 
 ![SlideSeq_diagram](./slide-seq_diagram.png)
 
diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
index 25a5426fe7..11983e5187 100644
--- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
@@ -7,7 +7,7 @@ slug: /Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README
 
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
 | :----: | :---: | :----: | :--------------: |
-| [MultiSampleSmartSeq2SingleNuclei_v1.3.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
+| [MultiSampleSmartSeq2SingleNuclei_v1.3.2](https://github.com/broadinstitute/warp/releases) | March, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
 ![](./snSS2.png)
diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md
index a758e085cb..03133f3ce4 100644
--- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md
+++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md
@@ -2,13 +2,13 @@
 sidebar_position: 2
 ---
 
-# Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Publication Methods
+# Smart-seq2 Single Nucleus Multi-Sample v1.3.2 Publication Methods
 
 Below we provide an example methods section for a publication. For the complete pipeline documentation, see the [Smart-seq2 Single Nucleus Multi-Sample Overview](./README.md).
 
 ## Methods
 
-Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. 
+Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.2 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. 
 
 For each nucleus in the batch, paired-end FASTQ files were first trimmed to remove adapters using the fastq-mcf tool with a subsampling parameter of 200,000 reads. The trimmed FASTQ files were then aligned to the GENCODE GRCm38 mouse genome using STAR v.2.7.10a. To count the number of reads per gene, but not isoforms, the quantMode parameter was set to GeneCounts. Multi-mapped reads, and optical and PCR duplicates, were removed from the resulting aligned BAM using the Picard MarkDuplicates tool with REMOVE_DUPLICATES = true. Metrics were collected on the deduplicated BAM using Picard CollectMultipleMetrics with VALIDATION_STRINGENCY =SILENT.
 

From 68321e06f78f9e038e27baa07d3d3642f16b6cb7 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 19 Mar 2024 10:57:04 -0400
Subject: [PATCH 017/186] quote whitelist

---
 tasks/skylab/FastqProcessing.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index 4a4aad4e1a..017ff02d8a 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -338,7 +338,7 @@ task FastqProcessATAC {
         done
         echo $R3_FILES_CONCAT
 
-        python3 /warptools/scripts/dynamic-barcode-orientation.py downsample.fq ~{whitelist} best_match.txt
+        python3 /warptools/scripts/dynamic-barcode-orientation.py downsample.fq "~{whitelist}" best_match.txt
         
         cat best_match.txt
         barcode_choice=$(<best_match.txt)

From 1ea1c174155c4446a80ca6a6aa9f7df39d2f6224 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 19 Mar 2024 11:29:57 -0400
Subject: [PATCH 018/186] quote whitelist

---
 tasks/skylab/FastqProcessing.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index 017ff02d8a..7bb6dc42fd 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -347,7 +347,7 @@ task FastqProcessATAC {
         # Call fastq process
         # outputs fastq files where the corrected barcode is in the read name
         mkdir output_fastq/
-        cd /output_fastq
+        cd output_fastq/
 
         fastqprocess \
         --num-output-files ~{num_output_files} \

From 96ca1609943c0f828cea9710d3ecc3a9ac222f39 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 19 Mar 2024 11:52:38 -0400
Subject: [PATCH 019/186] dirs

---
 tasks/skylab/FastqProcessing.wdl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index 7bb6dc42fd..90cd0732fb 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -323,7 +323,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ1_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R2 /input_fastq/$BASE`
+            BASE=`echo --R2 input_fastq/$BASE`
             R2_FILES_CONCAT+="$BASE "
         done
         echo $R2_FILES_CONCAT
@@ -333,7 +333,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ3_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R3 /input_fastq/$BASE`
+            BASE=`echo --R3 input_fastq/$BASE`
             R3_FILES_CONCAT+="$BASE "
         done
         echo $R3_FILES_CONCAT
@@ -371,8 +371,8 @@ task FastqProcessATAC {
     }
 
     output {
-        Array[File] fastq_R1_output_array = glob("/cromwell_root/output_fastq/fastq_R1_*")
-        Array[File] fastq_R3_output_array = glob("/cromwell_root/output_fastq/fastq_R3_*")
+        Array[File] fastq_R1_output_array = glob("output_fastq/fastq_R1_*")
+        Array[File] fastq_R3_output_array = glob("output_fastq/fastq_R3_*")
     }
 }
 

From bd24093da53f2d4249dc3df4e39b7d860f4b7b3e Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 19 Mar 2024 13:01:06 -0400
Subject: [PATCH 020/186] dirs

---
 .../multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json | 3 +--
 tasks/skylab/FastqProcessing.wdl                            | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
index c4a7d6d5d7..6e44d0c7c1 100644
--- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
+++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -24,6 +24,5 @@
   "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
   "Multiome.Atac.num_threads_bwa":"16",
   "Multiome.Atac.mem_size_bwa":"64", 
-  "Multiome.soloMultiMappers":"Uniform",
-  "Multiome.cloud_provider":"gcp"
+  "Multiome.soloMultiMappers":"Uniform"
 }
diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index 90cd0732fb..90011dfa8a 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -352,9 +352,9 @@ task FastqProcessATAC {
         fastqprocess \
         --num-output-files ~{num_output_files} \
         --sample-id "~{output_base_name}" \
-        $R1_FILES_CONCAT \
-        $R2_FILES_CONCAT \
-        $R3_FILES_CONCAT \
+        ../$R1_FILES_CONCAT \
+        ../$R2_FILES_CONCAT \
+        ../$R3_FILES_CONCAT \
         --white-list "~{whitelist}" \
         --output-format "FASTQ" \
         --barcode-orientation $barcode_choice \

From 68c0e8924388d68d96e03d555140d75058b69882 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 19 Mar 2024 18:46:59 -0400
Subject: [PATCH 021/186] dirs

---
 tasks/skylab/FastqProcessing.wdl | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index 90011dfa8a..c7ae558cf2 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -346,15 +346,13 @@ task FastqProcessATAC {
 
         # Call fastq process
         # outputs fastq files where the corrected barcode is in the read name
-        mkdir output_fastq/
-        cd output_fastq/
 
         fastqprocess \
         --num-output-files ~{num_output_files} \
         --sample-id "~{output_base_name}" \
-        ../$R1_FILES_CONCAT \
-        ../$R2_FILES_CONCAT \
-        ../$R3_FILES_CONCAT \
+        $R1_FILES_CONCAT \
+        $R2_FILES_CONCAT \
+        $R3_FILES_CONCAT \
         --white-list "~{whitelist}" \
         --output-format "FASTQ" \
         --barcode-orientation $barcode_choice \
@@ -371,8 +369,8 @@ task FastqProcessATAC {
     }
 
     output {
-        Array[File] fastq_R1_output_array = glob("output_fastq/fastq_R1_*")
-        Array[File] fastq_R3_output_array = glob("output_fastq/fastq_R3_*")
+        Array[File] fastq_R1_output_array = glob("fastq_R1_*")
+        Array[File] fastq_R3_output_array = glob("fastq_R3_*")
     }
 }
 

From 22510d0cba5dd914bbbf05c592bd36c70e119e5d Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Wed, 20 Mar 2024 12:36:25 -0400
Subject: [PATCH 022/186] add quotes to whitelist

---
 tasks/skylab/StarAlign.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 69a6851ec7..253040b820 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -299,7 +299,7 @@ task STARsoloFastq {
         --genomeDir genome_reference \
         --readFilesIn "~{sep=',' r2_fastq}" "~{sep=',' r1_fastq}" \
         --readFilesCommand "gunzip -c" \
-        --soloCBwhitelist ~{white_list} \
+        --soloCBwhitelist "~{white_list}" \
         --soloUMIlen $UMILen --soloCBlen $CBLen \
         --soloFeatures $COUNTING_MODE \
         --clipAdapterType CellRanger4 \
@@ -325,7 +325,7 @@ task STARsoloFastq {
             --genomeDir genome_reference \
             --readFilesIn "~{sep=',' r2_fastq}" "~{sep=',' r1_fastq}" \
             --readFilesCommand "gunzip -c" \
-            --soloCBwhitelist ~{white_list} \
+            --soloCBwhitelist "~{white_list}" \
             --soloUMIlen $UMILen --soloCBlen $CBLen \
             --soloFeatures $COUNTING_MODE  \
             --clipAdapterType CellRanger4 \
@@ -347,7 +347,7 @@ task STARsoloFastq {
             --genomeDir genome_reference \
             --readFilesIn "~{sep=',' r2_fastq}" "~{sep=',' r1_fastq}" \
             --readFilesCommand "gunzip -c" \
-            --soloCBwhitelist ~{white_list} \
+            --soloCBwhitelist "~{white_list}" \
             --soloUMIlen $UMILen --soloCBlen $CBLen \
             --soloFeatures $COUNTING_MODE \
             --clipAdapterType CellRanger4 \

From 208d14b5777145c13382401ee48a2da91be1e717 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 21 Mar 2024 08:56:46 -0400
Subject: [PATCH 023/186] mkdir cromwell_root

---
 tasks/skylab/StarAlign.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 253040b820..5874ff7035 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -375,6 +375,8 @@ task STARsoloFastq {
     touch Summary_sn_rna.csv
     touch UMIperCellSorted_sn_rna.txt
 
+    mkdir /cromwell_root
+
 
     if [[ "~{counting_mode}" == "sc_rna" ]]
     then

From cb9bbdbfb07c047eab016a13f785d720458e052a Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 21 Mar 2024 11:39:15 -0400
Subject: [PATCH 024/186] try vm family

---
 pipelines/skylab/multiome/atac.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 4a36f1b95a..5899243683 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -450,6 +450,7 @@ task BWAPairedEndAlignment {
     cpu: nthreads
     cpuPlatform: cpu_platform
     memory: "${mem_size} GiB"
+    vm_size: "Standard_E64ds_v4"
   }
 
   output {

From b32c3fbe03875cf4776d3619013b6ba4f06ba5ac Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 21 Mar 2024 11:51:03 -0400
Subject: [PATCH 025/186] try vm family

---
 pipelines/skylab/multiome/atac.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 5899243683..21a4d6c9b6 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -234,6 +234,7 @@ task GetNumSplits {
     cpu: nthreads
     cpuPlatform: cpu_platform
     memory: "${mem_size} GiB"
+    vm_size: "Standard_E64ds_v4"
   }
 
   output {

From 252bec6bc5778fe947487ecb90c11944bc33b61b Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 21 Mar 2024 11:55:10 -0400
Subject: [PATCH 026/186] Standard_M128s

---
 pipelines/skylab/multiome/atac.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 21a4d6c9b6..8a9535cbb1 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -234,7 +234,7 @@ task GetNumSplits {
     cpu: nthreads
     cpuPlatform: cpu_platform
     memory: "${mem_size} GiB"
-    vm_size: "Standard_E64ds_v4"
+    vm_size: "Standard_M128s"
   }
 
   output {
@@ -451,7 +451,7 @@ task BWAPairedEndAlignment {
     cpu: nthreads
     cpuPlatform: cpu_platform
     memory: "${mem_size} GiB"
-    vm_size: "Standard_E64ds_v4"
+    vm_size: "Standard_M128s"
   }
 
   output {

From 5b6d3d31e1745cde2c4756ba02562f42e03520e7 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Wed, 27 Mar 2024 09:31:04 -0400
Subject: [PATCH 027/186] try not to use cromwell root

---
 tasks/skylab/StarAlign.wdl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 5874ff7035..b71ae1eb5d 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -396,8 +396,11 @@ task STARsoloFastq {
       then
         SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw"
         echo "SoloDirectory is $SoloDirectory"
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        echo "list matrix files in $SoloDirectory"
+        ls "$SoloDirectory"/*.mtx
+        mv "Solo.out/GeneFull_Ex50pAS/raw/*.mtx" matrix.mtx
         mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
         mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
         mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats

From 4024e61beaffeff543b37699d9d1ef99c8ca45e2 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Wed, 27 Mar 2024 15:17:34 -0400
Subject: [PATCH 028/186] try not to use cromwell root

---
 tasks/skylab/StarAlign.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index b71ae1eb5d..01122bc77d 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -400,7 +400,7 @@ task STARsoloFastq {
         #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
         echo "list matrix files in $SoloDirectory"
         ls "$SoloDirectory"/*.mtx
-        mv "Solo.out/GeneFull_Ex50pAS/raw/*.mtx" matrix.mtx
+        mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx
         mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
         mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
         mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats

From f6827c303c79e98be4cca1d78ba234aabd87dffa Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Wed, 27 Mar 2024 15:53:11 -0400
Subject: [PATCH 029/186] try using logic

---
 pipelines/skylab/multiome/atac.wdl | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 8a9535cbb1..bf09d3564d 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -115,7 +115,8 @@ workflow ATAC {
         nthreads = num_threads_bwa, 
         mem_size = mem_size_bwa,
         cpu_platform = cpu_platform_bwa,
-        docker_path = docker_prefix + samtools_docker
+        docker_path = docker_prefix + samtools_docker,
+        cloud_provider = cloud_provider
   }
 
   if (preindex) {
@@ -319,6 +320,7 @@ task BWAPairedEndAlignment {
     String suffix = "trimmed_adapters.fastq.gz"
     String output_base_name
     String docker_path
+    String cloud_provider
 
     # Runtime attributes
     Int disk_size = 2000
@@ -338,6 +340,7 @@ task BWAPairedEndAlignment {
     disk_size : "disk size used in bwa alignment step"
     output_base_name: "basename to be used for the output of the task"
     docker_path: "The docker image path containing the runtime environment for this task"
+    cloud_provider: "The cloud provider for the pipeline."
   }
 
   String bam_aligned_output_name = output_base_name + ".bam"
@@ -436,13 +439,27 @@ task BWAPairedEndAlignment {
     # rename file to this
     mv final.sorted.bam ~{bam_aligned_output_name}
         
+    echo "the present working dir"
+    pwd
+
     # save output logs for bwa-mem2
     mkdir output_logs
     mv *txt output_logs
-    tar -zcvf /cromwell_root/output_distbwa_log.tar.gz output_logs  
-    
-    # move bam file to /cromwell_root
-    mv ~{bam_aligned_output_name} /cromwell_root
+
+    if [ "~{cloud_provider}" == "gcp" ]; then
+        tar -zcvf /cromwell_root/output_distbwa_log.tar.gz output_logs
+    else
+        tar -zcvf /cromwell-executions/output_distbwa_log.tar.gz output_logs
+    fi
+
+    # move bam file to the root of cromwell
+    # if the cloud provider is azure, move the file to /cromwell-executions
+    # if the cloud provider is gcp, move the file to /cromwell_root
+    if [ "~{cloud_provider}" == "gcp" ]; then
+      mv ~{bam_aligned_output_name} /cromwell_root
+    else
+      mv ~{bam_aligned_output_name} /cromwell-executions
+    fi
   >>>
 
   runtime {

From 32a83fc358f8aa779428141a70de51a0b4c5e0c6 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 28 Mar 2024 11:45:34 -0400
Subject: [PATCH 030/186] try using logic

---
 pipelines/skylab/multiome/atac.wdl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index bf09d3564d..ecd2ef51b2 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -444,21 +444,23 @@ task BWAPairedEndAlignment {
 
     # save output logs for bwa-mem2
     mkdir output_logs
-    mv *txt output_logs
+    mv *.txt output_logs
 
     if [ "~{cloud_provider}" == "gcp" ]; then
-        tar -zcvf /cromwell_root/output_distbwa_log.tar.gz output_logs
+        tar -zcvf output_distbwa_log.tar.gz output_logs
+        mv output_distbwa_log.tar.gz ../
     else
-        tar -zcvf /cromwell-executions/output_distbwa_log.tar.gz output_logs
+        tar -zcvf output_distbwa_log.tar.gz output_logs
+        mv output_distbwa_log.tar.gz ../
     fi
 
     # move bam file to the root of cromwell
     # if the cloud provider is azure, move the file to /cromwell-executions
     # if the cloud provider is gcp, move the file to /cromwell_root
     if [ "~{cloud_provider}" == "gcp" ]; then
-      mv ~{bam_aligned_output_name} /cromwell_root
+      mv ~{bam_aligned_output_name} ../
     else
-      mv ~{bam_aligned_output_name} /cromwell-executions
+      mv ~{bam_aligned_output_name} ../
     fi
   >>>
 

From 4b7a903062f1db4728b08998ae3fd12918c524ad Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 28 Mar 2024 15:09:21 -0400
Subject: [PATCH 031/186] update snapatac2 docker

---
 pipelines/skylab/multiome/atac.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index ecd2ef51b2..a6bed6d787 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -55,7 +55,7 @@ workflow ATAC {
   String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919"
   String samtools_docker = "samtools-dist-bwa:3.0.0"
   String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
-  String snap_atac_docker = "snapatac2:1.0.4-2.3.1"
+  String snap_atac_docker = "snapatac2:1.0.5-2.3.2-1709230223"
 
   # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error
   if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {

From aa1d23be57cd5b764d81849be41cb9682c57756c Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 28 Mar 2024 16:11:22 -0400
Subject: [PATCH 032/186] remove mkdir cromwell root

---
 tasks/skylab/StarAlign.wdl | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 01122bc77d..2bff800626 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -375,9 +375,6 @@ task STARsoloFastq {
     touch Summary_sn_rna.csv
     touch UMIperCellSorted_sn_rna.txt
 
-    mkdir /cromwell_root
-
-
     if [[ "~{counting_mode}" == "sc_rna" ]]
     then
       SoloDirectory="Solo.out/Gene/raw"

From d826b0a0c6bef6ae5ac30da72669ebe8551f575a Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 29 Mar 2024 08:04:07 -0400
Subject: [PATCH 033/186] snap dpcker

---
 pipelines/skylab/multiome/Multiome.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index b933c43be4..2cd2af2988 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -54,7 +54,7 @@ workflow Multiome {
     String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
 
     # Define docker images
-    String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229"
+    String snap_atac_docker_image = "snapatac2:1.0.5-2.3.2-1709230223"
 
     # Define all whitelist files
     File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"

From a57225d9c36ce08bd9cf2678ea01e41bb78c5e0c Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 29 Mar 2024 09:18:41 -0400
Subject: [PATCH 034/186] fix snap dpcker

---
 pipelines/skylab/multiome/Multiome.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 2cd2af2988..b933c43be4 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -54,7 +54,7 @@ workflow Multiome {
     String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
 
     # Define docker images
-    String snap_atac_docker_image = "snapatac2:1.0.5-2.3.2-1709230223"
+    String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229"
 
     # Define all whitelist files
     File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"

From 9012509b3b859cd356535fe4a64b6112e295f8f3 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 2 Apr 2024 13:32:53 -0400
Subject: [PATCH 035/186] fix starsolo fastq for other flavors of optimus

---
 tasks/skylab/StarAlign.wdl | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 2bff800626..d2148ad128 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -379,8 +379,11 @@ task STARsoloFastq {
     then
       SoloDirectory="Solo.out/Gene/raw"
       echo "SoloDirectory is $SoloDirectory"
-      find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
-      find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+      #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
+      #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+      echo "list matrix files in $SoloDirectory"
+      ls "$SoloDirectory"/*.mtx
+      mv $SoloDirectory/matrix.mtx matrix.mtx
       mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv
       mv "Solo.out/Gene/raw/features.tsv" features.tsv
       mv "Solo.out/Gene/CellReads.stats" CellReads.stats
@@ -397,7 +400,7 @@ task STARsoloFastq {
         #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
         echo "list matrix files in $SoloDirectory"
         ls "$SoloDirectory"/*.mtx
-        mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx
+        mv $SoloDirectory/matrix.mtx matrix.mtx
         mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
         mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
         mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats
@@ -407,12 +410,18 @@ task STARsoloFastq {
       else
         SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw"
         echo "SoloDirectory is $SoloDirectory"
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        echo "list matrix files in $SoloDirectory"
+        ls "$SoloDirectory"/*.mtx
+        mv $SoloDirectory/matrix.mtx matrix.mtx
         SoloDirectory="Solo.out/Gene/raw"
         echo "SoloDirectory is $SoloDirectory"
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx";  echo mv {} "/cromwell_root/$new_name"'
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"'
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx";  echo mv {} "/cromwell_root/$new_name"'
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"'
+        echo "list matrix files in $SoloDirectory"
+        ls "$SoloDirectory"/*.mtx
+        mv $SoloDirectory/matrix.mtx matrix_sn_rna.mtx
         mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
         mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
         mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats

From b2e42b3af97321def046c23b6aa105498eafbf67 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 2 Apr 2024 14:26:25 -0400
Subject: [PATCH 036/186] merge conflicts

---
 pipelines/skylab/multiome/Multiome.changelog.md | 2 +-
 pipelines/skylab/multiome/Multiome.wdl          | 2 +-
 pipelines/skylab/optimus/Optimus.changelog.md   | 2 +-
 pipelines/skylab/optimus/Optimus.wdl            | 2 +-
 pipelines/skylab/paired_tag/PairedTag.wdl       | 2 +-
 pipelines/skylab/slideseq/SlideSeq.wdl          | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index 3ee0584b4a..e2e5fcec2e 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,4 +1,4 @@
-# 3.4.1
+# 3.4.2
 2024-04-01 (Date of Last Commit)
 
 * Updated the Optimus.wdl to run on Azure. This change does not affect the Multiome pipeline.
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index e07ff239e8..9fd708cc66 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "3.4.1"
+    String pipeline_version = "3.4.2"
 
     input {
         String cloud_provider
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index bce0f726f7..76dbb6637c 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,4 +1,4 @@
-# 6.6.1
+# 6.6.2
 2024-04-01 (Date of Last Commit)
 * Updated the Optimus.wdl to run on Azure.
 
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 64997c513d..0f4b96a2b3 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -68,7 +68,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "6.6.1"
+  String pipeline_version = "6.6.2"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index 242a942e76..8c7450abc2 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -5,7 +5,7 @@ import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
 import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing
 workflow PairedTag {
-    String pipeline_version = "0.4.1"
+    String pipeline_version = "0.4.2"
 
     input {
         String input_id
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index a9fd0e25ec..04aed4f979 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.4"
+    String pipeline_version = "3.1.5"
 
     input {
         Array[File] r1_fastq

From ab644328d9b7baf62b9eaf4e1af315ded570bd0d Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 2 Apr 2024 14:38:16 -0400
Subject: [PATCH 037/186] merge conflicts

---
 pipelines/skylab/slideseq/SlideSeq.wdl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 04aed4f979..8005922895 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -50,6 +50,7 @@ workflow SlideSeq {
     String picard_cloud_docker = "picard-cloud:2.26.10"
     String warp_tools_docker_2_0_1 = "warp-tools:2.0.1"
     String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985"
+    String star_merge_docker = "star-merge-npz:1.1"
 
     String ubuntu_docker = "ubuntu_16_0_4:latest"
     String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
@@ -148,7 +149,7 @@ workflow SlideSeq {
             features = STARsoloFastqSlideSeq.features,
             matrix = STARsoloFastqSlideSeq.matrix,
             input_id = input_id,
-            warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2
+            star_merge_docker_path = docker_prefix + star_merge_docker
     }
     if ( !count_exons ) {
         call H5adUtils.OptimusH5adGeneration as SlideseqH5adGeneration{
@@ -173,7 +174,7 @@ workflow SlideSeq {
                 features = STARsoloFastqSlideSeq.features_sn_rna,
                 matrix = STARsoloFastqSlideSeq.matrix_sn_rna,
                 input_id = input_id,
-                warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2
+                star_merge_docker_path = docker_prefix + star_merge_docker
         }
         call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{
             input:

From 537f3301c56c78f0b978ae3821e432c4e630877b Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Wed, 3 Apr 2024 09:04:35 -0400
Subject: [PATCH 038/186] fix StarAlign.wdl

---
 tasks/skylab/StarAlign.wdl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index b4e75e5565..9002223f7e 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -530,9 +530,12 @@ task MergeStarOutput {
     mkdir matrix
     #Using cp because mv isn't moving
     pwd
-    cp /cromwell_root/~{input_id}.uniform.mtx ./matrix/matrix.mtx
+    ls -lR
+    cp ~{input_id}.uniform.mtx ./matrix/matrix.mtx
     cp ~{barcodes_single} ./matrix/barcodes.tsv
     cp ~{features_single} ./matrix/features.tsv
+    echo "doing another ls"
+    ls -lR
 
     tar -zcvf ~{input_id}.mtx_files.tar ./matrix/*
 

From 1a0a3406aa0e3345c958600cdcc469eeda6b2a0a Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Wed, 3 Apr 2024 14:13:42 -0400
Subject: [PATCH 039/186] put whitelists in quotes

---
 tasks/skylab/CheckInputs.wdl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl
index 89b99c7798..57fbcaad1a 100644
--- a/tasks/skylab/CheckInputs.wdl
+++ b/tasks/skylab/CheckInputs.wdl
@@ -119,10 +119,10 @@ task checkOptimusInput {
       then
       if [[ "~{cloud_provider}" == "gcp" ]]
       then
-        WHITELIST=~{gcp_whitelist_v2}
+        WHITELIST="~{gcp_whitelist_v2}"
       elif [[ "~{cloud_provider}" == "azure" ]]
       then
-        WHITELIST=~{azure_whitelist_v2}
+        WHITELIST="~{azure_whitelist_v2}"
       else
         pass="false"
         echo "ERROR: Cloud provider must be either gcp or azure"
@@ -134,10 +134,10 @@ task checkOptimusInput {
       then
       if [[ "~{cloud_provider}" == "gcp" ]]
       then
-        WHITELIST=~{gcp_whitelist_v3}
+        WHITELIST="~{gcp_whitelist_v3}"
       elif [[ "~{cloud_provider}" == "azure" ]]
       then
-        WHITELIST=~{azure_whitelist_v3}
+        WHITELIST="~{azure_whitelist_v3}"
       else
         pass="false"
         echo "ERROR: Cloud provider must be either gcp or azure"

From bfcd70084ad63d912e43275d47a70ee0172a5548 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <45041478+sahakiann@users.noreply.github.com>
Date: Mon, 8 Apr 2024 11:52:41 -0400
Subject: [PATCH 040/186] PD-2516: Update Paired-Tag to run in Azure and GCP
 (#1212)

* PD-2516: Update PairedTag to run in Azure and GCP

* json formatting

* update file location for GCP vs. Azure and documentation accordingly

* merge conflicts after rebase, update pipeline cahngelog and readme version

* more fixes after rebase

* more fixes after rebase

* more fixes after rebase

* fix readme

* adding sas tokens

* fixing womtools error

* update pipeline change logs and versions

---------

Co-authored-by: npetrill <npetrill@broadinstitute.org>
Co-authored-by: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
---
 .../skylab/multiome/Multiome.changelog.md     |  5 ++
 pipelines/skylab/multiome/Multiome.wdl        |  2 +-
 pipelines/skylab/multiome/atac.changelog.md   |  5 ++
 pipelines/skylab/multiome/atac.wdl            |  3 +-
 .../skylab/paired_tag/PairedTag.changelog.md  |  5 ++
 pipelines/skylab/paired_tag/PairedTag.wdl     | 49 +++++++++++++++----
 .../Plumbing/10k_pbmc_downsampled.json        | 11 +++--
 .../test_inputs/Scientific/10k_pbmc.json      | 11 +++--
 ...iSampleSmartSeq2SingleNucleus.changelog.md |  6 +++
 .../MultiSampleSmartSeq2SingleNucleus.wdl     |  3 +-
 tasks/skylab/PairedTagUtils.wdl               |  9 ++--
 .../Pipelines/PairedTag_Pipeline/README.md    |  4 +-
 12 files changed, 84 insertions(+), 29 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index 8e2cc66d4c..7704a65ae5 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,3 +1,8 @@
+# 3.4.3
+2024-04-08 (Date of Last Commit)
+
+* Updated the PairedTag.wdl to run on Azure. This change does not affect the Multiome pipeline.
+
 # 3.4.2
 2024-04-01 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 9fd708cc66..8bfd9c7222 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "3.4.2"
+    String pipeline_version = "3.4.3"
 
     input {
         String cloud_provider
diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
index ef74303072..67db095802 100644
--- a/pipelines/skylab/multiome/atac.changelog.md
+++ b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,3 +1,8 @@
+# 1.2.3
+2024-04-08 (Date of Last Commit)
+
+*  Updated the PairedTag.wdl to run on Azure. This change does not affect the ATAC pipeline.
+
 # 1.2.2
 2024-04-02 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index a7846f0e4f..b286144756 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -43,7 +43,8 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "1.2.2"
+  String pipeline_version = "1.2.3"
+
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index 09950e498c..b763a378a7 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,3 +1,8 @@
+# 0.5.2
+2024-04-08 (Date of Last Commit)
+
+* Updated the PairedTag.wdl to run in Azure 
+
 # 0.5.1
 2024-04-04 (Date of Last Commit)
 
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index a4de0f85d7..83b9f351be 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -2,10 +2,12 @@ version 1.0
 
 import "../../../pipelines/skylab/multiome/atac.wdl" as atac
 import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
-import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing
+import "../../../tasks/broad/Utilities.wdl" as utils
+
 workflow PairedTag {
-    String pipeline_version = "0.5.1"
+
+    String pipeline_version = "0.5.2"
 
     input {
         String input_id
@@ -24,7 +26,7 @@ workflow PairedTag {
         Boolean ignore_r1_read_length = false
         String star_strand_mode = "Forward"
         Boolean count_exons = false
-        File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
+        File gex_whitelist = if cloud_provider == "gcp" then "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" else "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D"
 
         # ATAC inputs
         # Array of input fastq files
@@ -38,11 +40,34 @@ workflow PairedTag {
         String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG"
         String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
         # Whitelist
-        File atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt"
+        File atac_whitelist = if cloud_provider == "gcp" then "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" else "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D"
 
         # PairedTag
         Boolean preindex
+
+        # Expected to be either 'gcp' or 'azure'
+        String cloud_provider
+    }
+
+    # All docker images that are needed for tasks in this workflow
+    String upstools_docker = "upstools:1.2.0-2023.03.03-1704723060"
+    String snapatac_docker = "snapatac2:1.0.4-2.3.1-1700590229"
+
+    # Prefixes based on cloud env
+    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
+    String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
+
+    # choose docker prefix based on cloud_provider input
+    String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+
+    # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error
+    if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+        call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+            input:
+                message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+        }
     }
+
     # Call the Optimus workflow
     call optimus.Optimus as Optimus {
         input:
@@ -62,10 +87,9 @@ workflow PairedTag {
             ignore_r1_read_length = ignore_r1_read_length,
             star_strand_mode = star_strand_mode,
             count_exons = count_exons,
+            cloud_provider = cloud_provider,
     }
 
-    # Call the ATAC workflow
-        # Call the ATAC workflow
     scatter (idx in range(length(atac_r1_fastq))) {
         call Demultiplexing.PairedTagDemultiplex as demultiplex {
             input:
@@ -74,9 +98,12 @@ workflow PairedTag {
               barcodes_fastq = atac_r2_fastq[idx],
               input_id = input_id,
               whitelist = atac_whitelist,
-              preindex = preindex
+              preindex = preindex,
+              docker_path = docker_prefix + upstools_docker
         }
-    }      
+    }
+
+    # Call the ATAC workflow
     call atac.ATAC as Atac_preindex {
         input:
             read1_fastq_gzipped = demultiplex.fastq1,
@@ -89,14 +116,16 @@ workflow PairedTag {
             whitelist = atac_whitelist,
             adapter_seq_read1 = adapter_seq_read1,
             adapter_seq_read3 = adapter_seq_read3,
-            preindex = preindex
+            preindex = preindex,
+            cloud_provider = cloud_provider,
     }
 
     if (preindex) {
         call Demultiplexing.ParseBarcodes as ParseBarcodes {
             input:
               atac_h5ad = Atac_preindex.snap_metrics,
-              atac_fragment = Atac_preindex.fragment_file
+              atac_fragment = Atac_preindex.fragment_file,
+              docker_path = docker_prefix + snapatac_docker,
         }
     }      
 
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json
index e46f86c366..869012fcb6 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -16,9 +16,10 @@
   "PairedTag.atac_r3_fastq":[
     "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R3_atac.fastq.gz"
   ],
-  "PairedTag.ref_genome_fasta":"gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa",
-  "PairedTag.tar_bwa_reference":"gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar",
-  "PairedTag.tar_star_reference":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar",
-  "PairedTag.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
-  "PairedTag.preindex":"false"
+  "PairedTag.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa",
+  "PairedTag.tar_bwa_reference": "gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar",
+  "PairedTag.tar_star_reference": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar",
+  "PairedTag.chrom_sizes": "gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
+  "PairedTag.preindex": "false",
+  "PairedTag.cloud_provider": "gcp"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json
index 888439d2a6..0cbf338449 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json
@@ -25,9 +25,10 @@
     "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L001_R3_001.fastq.gz",
     "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L002_R3_001.fastq.gz"
   ],
-  "PairedTag.ref_genome_fasta":"gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa",
-  "PairedTag.tar_bwa_reference":"gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar",
-  "PairedTag.tar_star_reference":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar",
-  "PairedTag.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
-  "PairedTag.preindex":"false"
+  "PairedTag.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa",
+  "PairedTag.tar_bwa_reference": "gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar",
+  "PairedTag.tar_star_reference": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar",
+  "PairedTag.chrom_sizes": "gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
+  "PairedTag.preindex": "false",
+  "PairedTag.cloud_provider": "gcp"
 }
\ No newline at end of file
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
index d6ae9dd60f..d3bd1ba5a1 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
@@ -1,3 +1,8 @@
+# 1.3.5
+2024-04-08 (Date of Last Commit)
+
+* Updated the PairedTag.wdl to run on Azure. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline.
+ 
 # 1.3.4
 2024-04-02 (Date of Last Commit)
 
@@ -14,6 +19,7 @@
 * Added cell metrics to the library-level metrics CSV; this does not impact the Single-nucleus Multi Sample Smartseq pipeline
 * Updated the docker for the MergeStarOutput task to include STARsolo v2.7.11a and custom scripts to create a uniform matrix file and scripts to collect library-level metrics from STARsolo output
 * Modified the MergeStarOutput to call a custom script for creating a uniform matrix file (mtx) from individual shard mtx files and to create a filtered matrix from the uniform matrix with STARsolo
+
 # 1.3.1
 2024-02-28 (Date of Last Commit)
 
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
index 77da68f74a..3ed1b6a220 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
@@ -57,7 +57,8 @@ workflow MultiSampleSmartSeq2SingleNucleus {
   }
 
   # Version of this pipeline
-  String pipeline_version = "1.3.4"
+
+  String pipeline_version = "1.3.5"
 
   if (false) {
      String? none = "None"
diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl
index 7fcd867474..d3754c55f3 100644
--- a/tasks/skylab/PairedTagUtils.wdl
+++ b/tasks/skylab/PairedTagUtils.wdl
@@ -7,7 +7,7 @@ task PairedTagDemultiplex {
         String input_id
         Boolean preindex
         File whitelist
-        String docker = "us.gcr.io/broad-gotc-prod/upstools:1.2.0-2023.03.03-1704723060"
+        String docker_path
         Int cpu = 1
         Int disk_size = ceil(2 * (size(read1_fastq, "GiB") + size(read3_fastq, "GiB") + size(barcodes_fastq, "GiB") )) + 400
         Int preemptible = 3
@@ -23,7 +23,7 @@ task PairedTagDemultiplex {
         preindex: "Boolean for whether data has a sample barcode that needs to be demultiplexed"
         whitelist: "Atac whitelist for 10x multiome data"
         input_id: "Input ID to demarcate sample"
-        docker: "(optional) the docker image containing the runtime environment for this task"
+        docker_path: "(optional) the docker image containing the runtime environment for this task"
         mem_size: "(optional) the amount of memory (MiB) to provision for this task"
         cpu: "(optional) the number of cpus to provision for this task"
         disk_size: "(optional) the amount of disk space (GiB) to provision for this task"
@@ -112,7 +112,7 @@ task PairedTagDemultiplex {
     >>>
     
     runtime {
-        docker: docker
+        docker: docker_path
         cpu: cpu
         memory: "${mem_size} GiB"
         disks: "local-disk ${disk_size} HDD"
@@ -185,6 +185,7 @@ task ParseBarcodes {
         File atac_fragment
         Int nthreads = 1
         String cpuPlatform = "Intel Cascade Lake"
+        String docker_path
     }
 
     String atac_base_name = basename(atac_h5ad, ".h5ad")
@@ -254,7 +255,7 @@ task ParseBarcodes {
   >>>
 
   runtime {
-      docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229"
+      docker: docker_path
       disks: "local-disk ~{disk} HDD"
       memory: "${machine_mem_mb} MiB"
       cpu: nthreads
diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md
index f7c2d16469..2d45e01ec4 100644
--- a/website/docs/Pipelines/PairedTag_Pipeline/README.md
+++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md
@@ -7,7 +7,7 @@ slug: /Pipelines/PairedTag_Pipeline/README
 
 |                          Pipeline Version                           | Date Updated | Documentation Author | Questions or Feedback |
 |:-------------------------------------------------------------------:| :---: | :----: | :--------------: |
-| [PairedTag_v0.4.2](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
+| [PairedTag_v0.5.2](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
 ## Introduction to the Paired-Tag workflow
@@ -118,7 +118,7 @@ The Paired-Tag workflow calls two WARP subworkflows and an additional task which
 | cell_calls_gex | `<input_id>_gex.emptyDrops` | TSV file containing the EmptyDrops results when the Optimus workflow is run in sc_rna mode. |
 | h5ad_output_file_gex | `<input_id>_gex.h5ad` | h5ad (Anndata) file containing the raw cell-by-gene count matrix, gene metrics, cell metrics, and global attributes. See the [Optimus Count Matrix Overview](../Optimus_Pipeline/Loom_schema.md) for more details. |
 | library_metrics | `<input_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. |
-
+| cloud_provider  | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String      |
 
 ## Versioning and testing
 

From d657038f059f88b2c84c5eaf3362777cf3855b9d Mon Sep 17 00:00:00 2001
From: Farzaneh Khajouei <fkhajoue@broadinstitute.org>
Date: Thu, 11 Apr 2024 14:08:07 -0500
Subject: [PATCH 041/186] Fk pd 2513 reblock gvcf (#1261)

* added azure docker and updated ReblockGVCF to support gcp and azure

* updated verification test wdl

* updated UltimaGenomicsWholeGenomeGermline wdl and changelog

* Updated Changelog on BroadInternalUltimaGenomics

* updated tasks to use the new docker

* updated additional changelogs

* changed path for import utils.wdl

* update other wdls because of qc wdl change

* update other wdls because of qc wdl change

* update test wdls

* still need to update many input jsons

* Update ExomeGermlineSingleSample.changelog.md

* Update UltimaGenomicsWholeGenomeGermline.changelog.md

---------

Co-authored-by: npetrill <npetrill@broadinstitute.org>
Co-authored-by: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
---
 .../arrays/single_sample/Arrays.changelog.md  |  5 +++++
 .../broad/arrays/single_sample/Arrays.wdl     |  2 +-
 .../reblocking/ReblockGVCF.changelog.md       |  5 +++++
 .../reblocking/ReblockGVCF.exome.inputs.json  |  3 ++-
 .../reblocking/ReblockGVCF.wdl                | 21 ++++++++++++++++---
 .../Plumbing/G96830.NA12878.index.json        |  3 ++-
 .../test_inputs/Plumbing/G96830.NA12878.json  |  3 ++-
 .../test_inputs/Plumbing/NA12878.ultima.json  |  3 ++-
 .../test_inputs/Plumbing/RP-929.NA12878.json  |  3 ++-
 .../Scientific/C1963.CHMI_CHMI3_Nex1.json     |  3 ++-
 .../test_inputs/Scientific/C862.NA19238.json  |  3 ++-
 .../test_inputs/Scientific/D5327.NA12878.json |  3 ++-
 .../test_inputs/Scientific/D5327.NA12891.json |  3 ++-
 .../Scientific/G94794.CHMI_CHMI3_WGS2.json    |  3 ++-
 .../Scientific/G94982.NA12878.json            |  3 ++-
 .../Scientific/G94982.NA12891.json            |  3 ++-
 .../Scientific/G94982.NA12892.json            |  3 ++-
 .../Scientific/G96830.NA12878.json            |  3 ++-
 .../test_inputs/Scientific/NA12878.bge.json   |  3 ++-
 .../Scientific/NA12878.ultima.json            |  3 ++-
 .../Scientific/RP-1535.NA17-308.json          |  3 ++-
 .../Scientific/RP-518.NA12878.json            |  3 ++-
 .../exome/Plumbing/RP-929.NA12878.json        |  3 ++-
 .../Scientific/C1963.CHMI_CHMI3_Nex1.json     |  3 ++-
 .../exome/Scientific/C862.NA19238.json        |  3 ++-
 .../exome/Scientific/D5327.NA12878.json       |  3 ++-
 .../exome/Scientific/D5327.NA12891.json       |  3 ++-
 .../exome/Scientific/D5327.NA12892.json       |  3 ++-
 .../exome/Scientific/RP-1535.NA17-308.json    |  3 ++-
 .../wgs/Plumbing/G96830.NA12878.json          |  3 ++-
 .../wgs/Plumbing/NA12878.ultima.json          |  3 ++-
 .../Scientific/G94794.CHMI_CHMI3_WGS2.json    |  3 ++-
 .../wgs/Scientific/G94982.NA12878.json        |  3 ++-
 .../wgs/Scientific/G94982.NA12891.json        |  3 ++-
 .../wgs/Scientific/G94982.NA12892.json        |  3 ++-
 .../wgs/Scientific/G96830.NA12878.json        |  3 ++-
 .../wgs/Scientific/NA12878.ultima.json        |  3 ++-
 .../wgs/Scientific/RP-518.NA12878.json        |  3 ++-
 .../ExomeGermlineSingleSample.changelog.md    |  5 +++++
 .../exome/ExomeGermlineSingleSample.wdl       | 21 +++++++++++++++++--
 .../test_inputs/Plumbing/RP-929.NA12878.json  |  3 ++-
 .../Scientific/C1963.CHMI_CHMI3_Nex1.json     |  3 ++-
 .../test_inputs/Scientific/C862.NA19238.json  |  3 ++-
 .../test_inputs/Scientific/D5327.NA12878.json |  3 ++-
 .../test_inputs/Scientific/D5327.NA12891.json |  3 ++-
 .../test_inputs/Scientific/D5327.NA12892.json |  3 ++-
 .../Scientific/RP-1535.NA17-308.json          |  3 ++-
 ...maGenomicsWholeGenomeGermline.changelog.md |  7 ++++++-
 .../UltimaGenomicsWholeGenomeGermline.wdl     |  5 +++--
 ...oleGenomeGermlineSingleSample.changelog.md |  5 +++++
 .../wgs/WholeGenomeGermlineSingleSample.wdl   |  7 +++++--
 ...mple.inputs.plumbing.masked_reference.json |  1 +
 .../test_inputs/Plumbing/G96830.NA12878.json  |  3 ++-
 .../Plumbing/dragen_mode_best_results.json    |  3 ++-
 .../dragen_mode_functional_equivalence.json   |  3 ++-
 .../Scientific/G94794.CHMI_CHMI3_WGS2.json    |  3 ++-
 ...4982.NA12878.dragen_mode_best_results.json |  3 ++-
 ...78.dragen_mode_functional_equivalence.json |  3 ++-
 .../Scientific/G94982.NA12878.json            |  3 ++-
 .../Scientific/G94982.NA12891.json            |  3 ++-
 .../Scientific/G94982.NA12892.json            |  3 ++-
 .../Scientific/G96830.NA12878.json            |  3 ++-
 .../Scientific/RP-518.NA12878.json            |  3 ++-
 .../VariantCalling.changelog.md               |  5 +++++
 .../variant_calling/VariantCalling.wdl        | 21 ++++++++++++++++---
 .../test_inputs/Plumbing/G96830.NA12878.json  |  3 ++-
 .../test_inputs/Plumbing/RP-929.NA12878.json  |  3 ++-
 .../exome/Plumbing/RP-929.NA12878.json        |  3 ++-
 .../wgs/Plumbing/G96830.NA12878.json          |  3 ++-
 ...maGenomicsWholeGenomeCramOnly.changelog.md |  5 +++++
 .../UltimaGenomicsWholeGenomeCramOnly.wdl     |  2 +-
 .../IlluminaGenotypingArray.changelog.md      |  5 +++++
 .../illumina/IlluminaGenotypingArray.wdl      |  2 +-
 .../BroadInternalArrays.changelog.md          |  5 +++++
 .../single_sample/BroadInternalArrays.wdl     |  2 +-
 .../BroadInternalUltimaGenomics.changelog.md  |  5 +++++
 .../BroadInternalUltimaGenomics.wdl           |  2 +-
 .../BroadInternalRNAWithUMIs.changelog.md     |  5 +++++
 .../rna_seq/BroadInternalRNAWithUMIs.wdl      |  2 +-
 .../broad/qc/CheckFingerprint.changelog.md    |  5 +++++
 pipelines/broad/qc/CheckFingerprint.wdl       |  2 +-
 .../exome/ExomeReprocessing.changelog.md      |  5 +++++
 .../reprocessing/exome/ExomeReprocessing.wdl  |  5 ++++-
 .../ExternalExomeReprocessing.changelog.md    |  5 +++++
 .../exome/ExternalExomeReprocessing.wdl       |  7 +++++--
 ...ternalWholeGenomeReprocessing.changelog.md |  5 +++++
 .../wgs/ExternalWholeGenomeReprocessing.wdl   |  7 +++++--
 .../wgs/WholeGenomeReprocessing.changelog.md  |  5 +++++
 .../wgs/WholeGenomeReprocessing.wdl           |  7 +++++--
 tasks/broad/GermlineVariantDiscovery.wdl      |  4 ++--
 tasks/broad/Qc.wdl                            |  4 ++--
 .../TestExomeGermlineSingleSample.wdl         |  4 +++-
 verification/test-wdls/TestReblockGVCF.wdl    |  4 +++-
 verification/test-wdls/TestVariantCalling.wdl |  4 +++-
 .../TestWholeGenomeGermlineSingleSample.wdl   |  4 +++-
 95 files changed, 299 insertions(+), 91 deletions(-)

diff --git a/pipelines/broad/arrays/single_sample/Arrays.changelog.md b/pipelines/broad/arrays/single_sample/Arrays.changelog.md
index aede89d29b..cf42113ba8 100644
--- a/pipelines/broad/arrays/single_sample/Arrays.changelog.md
+++ b/pipelines/broad/arrays/single_sample/Arrays.changelog.md
@@ -1,3 +1,8 @@
+# 2.6.24
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 2.6.23
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/arrays/single_sample/Arrays.wdl b/pipelines/broad/arrays/single_sample/Arrays.wdl
index 461e07aa8d..75e52e5c90 100644
--- a/pipelines/broad/arrays/single_sample/Arrays.wdl
+++ b/pipelines/broad/arrays/single_sample/Arrays.wdl
@@ -23,7 +23,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Arrays {
 
-  String pipeline_version = "2.6.23"
+  String pipeline_version = "2.6.24"
 
   input {
     String chip_well_barcode
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md
index b06cdec265..7bb0abdfbb 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md
@@ -1,3 +1,8 @@
+# 2.2.0
+2024-04-08 (Date of Last Commit)
+
+* Updated ReblockGVCF.wdl to run in Azure.
+
 # 2.1.12
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json
index 58f7ac8dcd..b4e84a89a2 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/ExomeGermlineSingleSample/truth/plumbing/master/RP-929.NA12878/NA12878_PLUMBING.rb.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider":"gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
index 3ef03fba17..f9a14011dc 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
@@ -2,10 +2,11 @@ version 1.0
 
 import "../../../../../../tasks/broad/GermlineVariantDiscovery.wdl" as Calling
 import "../../../../../../tasks/broad/Qc.wdl" as QC
+import "../../../../../../tasks/broad/Utilities.wdl" as utils
 
 workflow ReblockGVCF {
 
-  String pipeline_version = "2.1.12"
+  String pipeline_version = "2.2.0"
 
 
   input {
@@ -20,9 +21,22 @@ workflow ReblockGVCF {
     String? annotations_to_remove_command
     Boolean? move_filters_to_genotypes
     String gvcf_file_extension = ".g.vcf.gz"
+    String cloud_provider
   }
 
   String gvcf_basename = basename(gvcf, gvcf_file_extension)
+  # docker images
+  String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
+  String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
+
+  # make sure either gcp or azr is supplied as cloud_provider input
+  if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+    call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+      input:
+        message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+    }
+  }
 
   call Calling.Reblock as Reblock {
     input:
@@ -35,7 +49,8 @@ workflow ReblockGVCF {
       annotations_to_keep_command = annotations_to_keep_command,
       annotations_to_remove_command = annotations_to_remove_command,
       move_filters_to_genotypes = move_filters_to_genotypes,
-      output_vcf_filename = gvcf_basename + ".rb.g.vcf.gz"
+      output_vcf_filename = gvcf_basename + ".rb.g.vcf.gz",
+      docker_path = gatk_docker
   }
 
     # Validate the (g)VCF output of HaplotypeCaller
@@ -51,7 +66,7 @@ workflow ReblockGVCF {
         calling_intervals_defined = defined(calling_interval_list),
         is_gvcf = true,
         extra_args = "--no-overlaps",
-        gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+        docker_path = gatk_docker
     }
 
   output {
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json
index aa862a064f..2ea7652b7e 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json
@@ -3,6 +3,7 @@
     "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/wgs/plumbing/input/G96830.NA12878/index_in_different_location/NA12878_PLUMBING.g.vcf.gz.tbi",
     "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
     "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-    "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+    "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+    "ReblockGVCF.cloud_provider": "gcp"
   }
   
\ No newline at end of file
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json
index 76086ae169..81d7cc66ee 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json
index 33b71d9875..1e903059bf 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json
@@ -6,5 +6,6 @@
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
   "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
   "ReblockGVCF.tree_score_cutoff": 0.2,
-  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS"
+  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json
index 5bd0ce00af..b1717905be 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/plumbing/input/RP-929.NA12878/NA12878_PLUMBING.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json
index b7dea8da45..757f468933 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C1963.CHMI_CHMI3_Nex1/CHMI_CHMI3_Nex1.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json
index c2a496da55..3198fdf70d 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C862.NA19238/NA19238.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json
index e5791f69bb..626f8fb268 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12878/NA12878.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json
index 28fe2ca47f..35b71a1271 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12891/NA12891.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json
index 33eabdc0c5..53554e2d84 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json
index 5518401aee..8e1d594362 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json
index 67cd0891c3..561e7dfea4 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json
index 84acd3b6eb..c8ae0e0e8f 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json
index 2ff9d8a64a..881ce23794 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json
index 5e99cbce58..459a89bc2f 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json
@@ -7,5 +7,6 @@
   "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
   "ReblockGVCF.annotations_to_remove_command": "--format-annotations-to-remove PRI",
   "ReblockGVCF.move_filters_to_genotypes": true,
-  "ReblockGVCF.gvcf_file_extension": ".gvcf.gz"
+  "ReblockGVCF.gvcf_file_extension": ".gvcf.gz",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json
index 4dd0f918da..ac12ce5429 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json
@@ -6,5 +6,6 @@
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
   "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
   "ReblockGVCF.tree_score_cutoff": 0.2,
-  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS"
+  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json
index 9a2ad60cf6..5bed19c39a 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/RP-1535.NA17-308/NA17-308.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json
index b3fbe04a0d..8136913847 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json
index 5bd0ce00af..b1717905be 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/plumbing/input/RP-929.NA12878/NA12878_PLUMBING.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json
index b7dea8da45..757f468933 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C1963.CHMI_CHMI3_Nex1/CHMI_CHMI3_Nex1.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json
index c2a496da55..3198fdf70d 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C862.NA19238/NA19238.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json
index e5791f69bb..626f8fb268 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12878/NA12878.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json
index 28fe2ca47f..35b71a1271 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12891/NA12891.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json
index 9235c26a47..f5e1898ba6 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12892/NA12892.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json
index 9a2ad60cf6..5bed19c39a 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json
@@ -3,5 +3,6 @@
   "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/RP-1535.NA17-308/NA17-308.g.vcf.gz.tbi",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json
index 76086ae169..81d7cc66ee 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json
index 33b71d9875..1e903059bf 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json
@@ -6,5 +6,6 @@
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
   "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
   "ReblockGVCF.tree_score_cutoff": 0.2,
-  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS"
+  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json
index 33eabdc0c5..53554e2d84 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json
index 5518401aee..8e1d594362 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json
index 67cd0891c3..561e7dfea4 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json
index 84acd3b6eb..c8ae0e0e8f 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json
index 2ff9d8a64a..881ce23794 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json
index 4dd0f918da..ac12ce5429 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json
@@ -6,5 +6,6 @@
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
   "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
   "ReblockGVCF.tree_score_cutoff": 0.2,
-  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS"
+  "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json
index b3fbe04a0d..8136913847 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json
@@ -4,5 +4,6 @@
   "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
   "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
   "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
-  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict"
+  "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
+  "ReblockGVCF.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
index e0c9f8af81..acfffcef76 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.20
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline  
+
 # 3.1.19
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
index 7bbc434227..f5efc80b60 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
@@ -40,11 +40,12 @@ import "../../../../../../tasks/broad/BamProcessing.wdl" as Processing
 import "../../../../../../tasks/broad/BamToCram.wdl" as ToCram
 import "../../../../../../pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl" as ToGvcf
 import "../../../../../../structs/dna_seq/DNASeqStructs.wdl"
+import "../../../../../../tasks/broad/Utilities.wdl" as utils
 
 # WORKFLOW DEFINITION
 workflow ExomeGermlineSingleSample {
 
-  String pipeline_version = "3.1.19"
+  String pipeline_version = "3.1.20"
 
 
   input {
@@ -62,6 +63,21 @@ workflow ExomeGermlineSingleSample {
 
     Boolean skip_reblocking = false
     Boolean provide_bam_output = false
+
+    String cloud_provider
+  }
+
+  # docker images
+  String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
+  String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
+
+  # make sure either gcp or azr is supplied as cloud_provider input
+  if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+    call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+      input:
+        message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+    }
   }
 
   # Not overridable:
@@ -141,7 +157,8 @@ workflow ExomeGermlineSingleSample {
       base_file_name = sample_and_unmapped_bams.base_file_name,
       final_vcf_base_name = final_gvcf_base_name,
       agg_preemptible_tries = papi_settings.agg_preemptible_tries,
-      skip_reblocking = skip_reblocking
+      skip_reblocking = skip_reblocking,
+      cloud_provider = cloud_provider
   }
 
   call QC.CollectHsMetrics as CollectHsMetrics {
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json
index a2f7bbfb29..17c06f79b6 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json
@@ -57,5 +57,6 @@
 
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false,
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false,
-  "ExomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true
+  "ExomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true,
+  "ExomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json
index 1c4ba00d72..163e2f8265 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json
@@ -63,5 +63,6 @@
   },
 
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false,
-  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false
+  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false,
+  "ExomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json
index f884c22730..c90ddcf59f 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json
@@ -71,5 +71,6 @@
   },
 
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false,
-  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false
+  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false,
+  "ExomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json
index 79b98889b0..a302f38a4f 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json
@@ -56,5 +56,6 @@
   },
 
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false,
-  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false
+  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false,
+  "ExomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json
index 72722de383..945d7bb79c 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json
@@ -56,5 +56,6 @@
   },
 
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false,
-  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false
+  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false,
+  "ExomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json
index 028be345a3..67ee0a8bd0 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json
@@ -57,5 +57,6 @@
   },
 
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false,
-  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false
+  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false,
+  "ExomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json
index ab6c472216..1d8834a98e 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json
@@ -73,5 +73,6 @@
   },
 
   "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false,
-  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false
+  "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false,
+  "ExomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
index 9370fb1fa6..388d75b7fb 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
@@ -1,3 +1,8 @@
+# 1.0.17
+2024-04-08 (Date of Last Commit)
+
+* Changed ReblockGVCFs.wdl to be multicloud
+
 # 1.0.16
 2024-03-26 (Date of Last Commit)
 
@@ -91,4 +96,4 @@
 2022-05-05 (Date of Last Commit)
 
 * Initial Release of UltimaGenomicsWholeGenomeGermline pipeline.
-* The UltimaGenomicsWholeGenomeGermline pipeline is an open-source, cloud-optimized workflow created for processing Ultima Genomics Whole Genome Sequenced Germline samples. Overall, the workflow aligns reads to the genome, marks duplicates, calls variants, and calculates quality metrics to produce a CRAM, CRAI, GVCF, filtered VCF, and quality metrics.
\ No newline at end of file
+* The UltimaGenomicsWholeGenomeGermline pipeline is an open-source, cloud-optimized workflow created for processing Ultima Genomics Whole Genome Sequenced Germline samples. Overall, the workflow aligns reads to the genome, marks duplicates, calls variants, and calculates quality metrics to produce a CRAM, CRAI, GVCF, filtered VCF, and quality metrics.
diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
index 997686a999..a404f5d561 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
@@ -50,7 +50,7 @@ workflow UltimaGenomicsWholeGenomeGermline {
     filtering_model_no_gt_name: "String describing the optional filtering model; default set to rf_model_ignore_gt_incl_hpol_runs"
   }
 
-  String pipeline_version = "1.0.16"
+  String pipeline_version = "1.0.17"
 
 
   References references = alignment_references.references
@@ -196,7 +196,8 @@ workflow UltimaGenomicsWholeGenomeGermline {
       ref_fasta = alignment_references.references.ref_fasta,
       ref_fasta_index = alignment_references.references.ref_fasta_index,
       tree_score_cutoff = vcf_post_processing.remove_low_tree_score_sites_cutoff,
-      annotations_to_keep_command = vcf_post_processing.annotations_to_keep_command_for_reblocking
+      annotations_to_keep_command = vcf_post_processing.annotations_to_keep_command_for_reblocking,
+      cloud_provider = "gcp"
   }
 
   # Outputs that will be retained when execution is complete
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
index f1f4d4b0e7..747a7030a1 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.21
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 3.1.20
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
index 2883780473..48af86c619 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
@@ -40,7 +40,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl"
 workflow WholeGenomeGermlineSingleSample {
 
 
-  String pipeline_version = "3.1.20"
+  String pipeline_version = "3.1.21"
 
 
   input {
@@ -68,6 +68,8 @@ workflow WholeGenomeGermlineSingleSample {
     Boolean use_bwa_mem = true
     Boolean allow_empty_ref_alt = false
     Boolean use_dragen_hard_filtering = false
+
+    String cloud_provider
   }
 
   if (dragen_functional_equivalence_mode && dragen_maximum_quality_mode) {
@@ -192,7 +194,8 @@ workflow WholeGenomeGermlineSingleSample {
       final_vcf_base_name = final_gvcf_base_name,
       agg_preemptible_tries = papi_settings.agg_preemptible_tries,
       use_gatk3_haplotype_caller = use_gatk3_haplotype_caller_,
-      use_dragen_hard_filtering = use_dragen_hard_filtering_
+      use_dragen_hard_filtering = use_dragen_hard_filtering_,
+      cloud_provider = cloud_provider
   }
 
   if (provide_bam_output) {
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json
index a2f8532cf7..309e93f9bd 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json
@@ -50,6 +50,7 @@
   "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz",
   "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi",
   "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list",
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp",
 
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json
index 772ee521b8..321ecbcc02 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json
@@ -58,5 +58,6 @@
 
   "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true,
   "WholeGenomeGermlineSingleSample.CollectWgsMetrics.read_length": 250,
-  "WholeGenomeGermlineSingleSample.CollectRawWgsMetrics.read_length": 250
+  "WholeGenomeGermlineSingleSample.CollectRawWgsMetrics.read_length": 250,
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json
index 96f903e80d..a06a620b6c 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json
@@ -62,5 +62,6 @@
   },
 
   "WholeGenomeGermlineSingleSample.dragen_maximum_quality_mode": true,
-  "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true
+  "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true,
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
\ No newline at end of file
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json
index 50b81f310b..928deacdb5 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json
@@ -63,5 +63,6 @@
   },
 
   "WholeGenomeGermlineSingleSample.dragen_functional_equivalence_mode": true,
-  "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true
+  "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true,
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
\ No newline at end of file
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json
index 8371849045..33374a597f 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json
@@ -73,5 +73,6 @@
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
     "agg_preemptible_tries": 3
-  }
+  },
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json
index 94f90073c8..c625c8b4c3 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json
@@ -83,5 +83,6 @@
   },
 
   "WholeGenomeGermlineSingleSample.dragen_maximum_quality_mode": true,
-  "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2
+  "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2,
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json
index c4b9608f29..271675b702 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json
@@ -82,5 +82,6 @@
   },
 
   "WholeGenomeGermlineSingleSample.dragen_functional_equivalence_mode": true,
-  "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2
+  "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2,
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json
index 344e66dd9a..96cac538de 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json
@@ -73,5 +73,6 @@
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
     "agg_preemptible_tries": 3
-  }
+  },
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json
index 650c41990f..eeccd9275b 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json
@@ -76,5 +76,6 @@
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
     "agg_preemptible_tries": 3
-  }
+  },
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json
index 9372e66905..5558036b60 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json
@@ -74,5 +74,6 @@
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
     "agg_preemptible_tries": 3
-  }
+  },
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json
index 7f5e219d59..b4e3b1574a 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json
@@ -73,5 +73,6 @@
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
     "agg_preemptible_tries": 3
-  }
+  },
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json
index 2032139bad..035b62a322 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json
@@ -50,5 +50,6 @@
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
     "agg_preemptible_tries": 3
-  }
+  },
+  "WholeGenomeGermlineSingleSample.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
index ee3a4be465..e0752ba664 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
@@ -1,3 +1,8 @@
+# 2.1.19
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 2.1.18
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 1de2cb2361..e703fd99d6 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks
 workflow VariantCalling {
 
 
-  String pipeline_version = "2.1.18"
+  String pipeline_version = "2.1.19"
 
 
   input {
@@ -36,6 +36,20 @@ workflow VariantCalling {
     Boolean use_gatk3_haplotype_caller = false
     Boolean skip_reblocking = false
     Boolean use_dragen_hard_filtering = false
+    String cloud_provider
+  }
+
+  # docker images
+  String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
+  String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
+
+  # make sure either gcp or azr is supplied as cloud_provider input
+  if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+    call Utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+      input:
+        message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+    }
   }
 
   parameter_meta {
@@ -158,7 +172,8 @@ workflow VariantCalling {
         ref_fasta = ref_fasta,
         ref_fasta_index = ref_fasta_index,
         ref_dict = ref_dict,
-        output_vcf_filename = basename(MergeVCFs.output_vcf, ".g.vcf.gz") + ".rb.g.vcf.gz"
+        output_vcf_filename = basename(MergeVCFs.output_vcf, ".g.vcf.gz") + ".rb.g.vcf.gz",
+        docker_path = gatk_docker
     }
   }
 
@@ -183,7 +198,7 @@ workflow VariantCalling {
       calling_interval_list = calling_interval_list,
       is_gvcf = make_gvcf,
       extra_args = if (skip_reblocking == false) then "--no-overlaps" else "",
-      gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0",
+      docker_path = gatk_docker,
       preemptible_tries = agg_preemptible_tries
   }
 
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json
index 4e4be85272..c13ceb45f8 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json
@@ -17,5 +17,6 @@
   "VariantCalling.haplotype_scatter_count": 10,
   "VariantCalling.break_bands_at_multiples_of": 100000,
   "VariantCalling.agg_preemptible_tries": 3,
-  "VariantCalling.use_gatk3_haplotype_caller": true
+  "VariantCalling.use_gatk3_haplotype_caller": true,
+  "VariantCalling.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json
index 1e89ca58f5..78f6c994e7 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json
@@ -17,5 +17,6 @@
   "VariantCalling.haplotype_scatter_count": 10,
   "VariantCalling.break_bands_at_multiples_of": 0,
   "VariantCalling.agg_preemptible_tries": 3,
-  "VariantCalling.use_gatk3_haplotype_caller": false
+  "VariantCalling.use_gatk3_haplotype_caller": false,
+  "VariantCalling.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json
index 1e89ca58f5..78f6c994e7 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json
@@ -17,5 +17,6 @@
   "VariantCalling.haplotype_scatter_count": 10,
   "VariantCalling.break_bands_at_multiples_of": 0,
   "VariantCalling.agg_preemptible_tries": 3,
-  "VariantCalling.use_gatk3_haplotype_caller": false
+  "VariantCalling.use_gatk3_haplotype_caller": false,
+  "VariantCalling.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json
index 4e4be85272..c13ceb45f8 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json
@@ -17,5 +17,6 @@
   "VariantCalling.haplotype_scatter_count": 10,
   "VariantCalling.break_bands_at_multiples_of": 100000,
   "VariantCalling.agg_preemptible_tries": 3,
-  "VariantCalling.use_gatk3_haplotype_caller": true
+  "VariantCalling.use_gatk3_haplotype_caller": true,
+  "VariantCalling.cloud_provider": "gcp"
 }
diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
index 53cdb52510..aabda5be46 100644
--- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
+++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
@@ -1,3 +1,8 @@
+# 1.0.17
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 1.0.16
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
index 9139aef12c..17d4fecfb8 100644
--- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
+++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
@@ -43,7 +43,7 @@ workflow UltimaGenomicsWholeGenomeCramOnly {
     save_bam_file: "If true, then save intermeidate ouputs used by germline pipeline (such as the output BAM) otherwise they won't be kept as outputs."
   }
 
-  String pipeline_version = "1.0.16"
+  String pipeline_version = "1.0.17"
 
   References references = alignment_references.references
 
diff --git a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md
index e404639206..a698100417 100644
--- a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md
+++ b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md
@@ -1,3 +1,8 @@
+# 1.12.18
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 1.12.17
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl
index 75a8dc1d7b..2443bc8bcb 100644
--- a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl
+++ b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl
@@ -21,7 +21,7 @@ import "../../../../tasks/broad/Qc.wdl" as Qc
 
 workflow IlluminaGenotypingArray {
 
-  String pipeline_version = "1.12.17"
+  String pipeline_version = "1.12.18"
 
   input {
     String sample_alias
diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md
index ffe7eece0c..e31bff0008 100644
--- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md
+++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md
@@ -1,3 +1,8 @@
+# 1.1.8
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 1.1.7
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
index 3dd62b09ae..b7bf1c183e 100644
--- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
+++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
@@ -9,7 +9,7 @@ workflow BroadInternalArrays {
         description: "Push outputs of Arrays.wdl to TDR dataset table ArraysOutputsTable."
     }
 
-    String pipeline_version = "1.1.7"
+    String pipeline_version = "1.1.8"
 
     input {
         # inputs to wrapper task
diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md
index 645e25f8fa..ce366234a3 100644
--- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md
+++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md
@@ -1,3 +1,8 @@
+# 1.0.18
+2024-04-08 (Date of Last Commit)
+
+* Updated ReblockGVCF.wdl to run in Azure.
+
 # 1.0.17
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
index fbd0ef4b53..946c9196dd 100644
--- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
+++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
@@ -6,7 +6,7 @@ import "../../../../../../../pipelines/broad/qc/CheckFingerprint.wdl" as FP
 
 workflow BroadInternalUltimaGenomics {
 
-  String pipeline_version = "1.0.17"
+  String pipeline_version = "1.0.18"
 
   input {
   
diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
index b455d24e9f..cfb9d14ae1 100644
--- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
+++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
@@ -1,3 +1,8 @@
+# 1.0.30
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 1.0.29
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
index 95edae4bb7..d4f5316e89 100644
--- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
+++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
@@ -7,7 +7,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
 
 workflow BroadInternalRNAWithUMIs {
 
-  String pipeline_version = "1.0.29"
+  String pipeline_version = "1.0.30"
 
   input {
     # input needs to be either "hg19" or "hg38"
diff --git a/pipelines/broad/qc/CheckFingerprint.changelog.md b/pipelines/broad/qc/CheckFingerprint.changelog.md
index fd7517251c..a8ed8c3e4d 100644
--- a/pipelines/broad/qc/CheckFingerprint.changelog.md
+++ b/pipelines/broad/qc/CheckFingerprint.changelog.md
@@ -1,3 +1,8 @@
+# 1.0.17
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 1.0.16
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/qc/CheckFingerprint.wdl b/pipelines/broad/qc/CheckFingerprint.wdl
index 2dbe67b878..0338466c3b 100644
--- a/pipelines/broad/qc/CheckFingerprint.wdl
+++ b/pipelines/broad/qc/CheckFingerprint.wdl
@@ -24,7 +24,7 @@ import "../../../tasks/broad/Qc.wdl" as Qc
 
 workflow CheckFingerprint {
 
-  String pipeline_version = "1.0.16"
+  String pipeline_version = "1.0.17"
 
   input {
     File? input_vcf
diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
index a5c4f30605..0cee3decbe 100644
--- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.20
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 3.1.19
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
index 11cc7ef033..0f4fadb666 100644
--- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
@@ -7,7 +7,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
 workflow ExomeReprocessing {
 
 
-  String pipeline_version = "3.1.19"
+  String pipeline_version = "3.1.20"
 
   input {
     File? input_cram
@@ -32,6 +32,8 @@ workflow ExomeReprocessing {
     File target_interval_list
     File bait_interval_list
     String bait_set_name
+
+    String cloud_provider
   }
 
   call ToUbams.CramToUnmappedBams {
@@ -64,6 +66,7 @@ workflow ExomeReprocessing {
       target_interval_list = target_interval_list,
       bait_interval_list = bait_interval_list,
       bait_set_name = bait_set_name,
+      cloud_provider = cloud_provider
   }
 
   output {
diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
index 0312d1bea5..d7bbf05bdc 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.22
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 3.1.21
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
index 7fc309a1e0..3ff6daaa8b 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
@@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
 
 workflow ExternalExomeReprocessing {
 
-  String pipeline_version = "3.1.21"
+  String pipeline_version = "3.1.22"
 
 
   input {
@@ -34,6 +34,8 @@ workflow ExternalExomeReprocessing {
     String destination_cloud_path
     String vault_token_path
     String google_account_vault_path
+
+    String cloud_provider
   }
 
   call ExomeReprocessing.ExomeReprocessing {
@@ -53,7 +55,8 @@ workflow ExternalExomeReprocessing {
       fingerprint_genotypes_index = fingerprint_genotypes_index,
       cram_ref_fasta = cram_ref_fasta,
       cram_ref_fasta_index = cram_ref_fasta_index,
-      papi_settings = papi_settings
+      papi_settings = papi_settings,
+      cloud_provider = cloud_provider
   }
 
   call Copy.CopyFilesFromCloudToCloud {
diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
index 71b139eb3e..57fce7e75e 100644
--- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
@@ -1,3 +1,8 @@
+# 2.1.22
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 2.1.21
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
index 609c70bc09..9776ce06d5 100644
--- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
 workflow ExternalWholeGenomeReprocessing {
 
 
-  String pipeline_version = "2.1.21"
+  String pipeline_version = "2.1.22"
 
   input {
     File? input_cram
@@ -33,6 +33,8 @@ workflow ExternalWholeGenomeReprocessing {
     String destination_cloud_path
     String vault_token_path
     String google_account_vault_path
+
+    String cloud_provider
   }
 
   call WholeGenomeReprocessing.WholeGenomeReprocessing {
@@ -51,7 +53,8 @@ workflow ExternalWholeGenomeReprocessing {
     fingerprint_genotypes_index = fingerprint_genotypes_index,
     papi_settings = papi_settings,
     wgs_coverage_interval_list = wgs_coverage_interval_list,
-    scatter_settings = scatter_settings
+    scatter_settings = scatter_settings,
+    cloud_provider = cloud_provider
   }
 
   call Copy.CopyFilesFromCloudToCloud {
diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
index fa7dd2579d..f32bf69607 100644
--- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.21
+2024-04-08 (Date of Last Commit)
+
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+
 # 3.1.20
 2024-03-26 (Date of Last Commit)
 
diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
index ac48aab3ed..cd4afd70b5 100644
--- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
 
 workflow WholeGenomeReprocessing {
 
-  String pipeline_version = "3.1.20"
+  String pipeline_version = "3.1.21"
 
   input {
     File? input_cram
@@ -29,6 +29,8 @@ workflow WholeGenomeReprocessing {
     File? fingerprint_genotypes_index
 
     File wgs_coverage_interval_list
+
+    String cloud_provider
   }
 
   call ToUbams.CramToUnmappedBams {
@@ -57,7 +59,8 @@ workflow WholeGenomeReprocessing {
       fingerprint_genotypes_file = fingerprint_genotypes_file,
       fingerprint_genotypes_index = fingerprint_genotypes_index,
       papi_settings = papi_settings,
-      wgs_coverage_interval_list = wgs_coverage_interval_list
+      wgs_coverage_interval_list = wgs_coverage_interval_list,
+      cloud_provider = cloud_provider
   }
 
   output {
diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl
index 0e3c8f2e6e..7294f2d0b5 100644
--- a/tasks/broad/GermlineVariantDiscovery.wdl
+++ b/tasks/broad/GermlineVariantDiscovery.wdl
@@ -203,7 +203,7 @@ task Reblock {
     File ref_fasta
     File ref_fasta_index
     String output_vcf_filename
-    String docker_image = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+    String docker_path
     Int additional_disk = 20
     String? annotations_to_keep_command
     String? annotations_to_remove_command
@@ -240,7 +240,7 @@ task Reblock {
     disks: "local-disk " + disk_size + " HDD"
     bootDiskSizeGb: 15
     preemptible: 3
-    docker: docker_image
+    docker: docker_path
   }
 
   output {
diff --git a/tasks/broad/Qc.wdl b/tasks/broad/Qc.wdl
index dfc6581f43..58c94f46e9 100644
--- a/tasks/broad/Qc.wdl
+++ b/tasks/broad/Qc.wdl
@@ -622,7 +622,7 @@ task ValidateVCF {
     Int preemptible_tries = 3
     Boolean is_gvcf = true
     String? extra_args
-    String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+    String docker_path
     Int machine_mem_mb = 7000
   }
 
@@ -657,7 +657,7 @@ task ValidateVCF {
       ~{extra_args}
   }
   runtime {
-    docker: gatk_docker
+    docker: docker_path
     preemptible: preemptible_tries
     memory: machine_mem_mb + " MiB"
     bootDiskSizeGb: 15
diff --git a/verification/test-wdls/TestExomeGermlineSingleSample.wdl b/verification/test-wdls/TestExomeGermlineSingleSample.wdl
index e6324a420c..59110d09be 100644
--- a/verification/test-wdls/TestExomeGermlineSingleSample.wdl
+++ b/verification/test-wdls/TestExomeGermlineSingleSample.wdl
@@ -28,6 +28,7 @@ workflow TestExomeGermlineSingleSample {
     Boolean update_truth
     String vault_token_path
     String google_account_vault_path
+    String cloud_provider
   }
 
   meta {
@@ -46,7 +47,8 @@ workflow TestExomeGermlineSingleSample {
       target_interval_list         = target_interval_list,
       bait_interval_list           = bait_interval_list,
       bait_set_name                = bait_set_name,
-      provide_bam_output           = provide_bam_output
+      provide_bam_output           = provide_bam_output,
+      cloud_provider               = cloud_provider
   }
 
   # Collect all of the pipeline outputs into a single Array[String]]
diff --git a/verification/test-wdls/TestReblockGVCF.wdl b/verification/test-wdls/TestReblockGVCF.wdl
index f34e22f1b7..01607636c7 100644
--- a/verification/test-wdls/TestReblockGVCF.wdl
+++ b/verification/test-wdls/TestReblockGVCF.wdl
@@ -27,6 +27,7 @@ workflow TestReblockGVCF {
       Boolean update_truth
       String vault_token_path
       String google_account_vault_path
+      String cloud_provider
     }
 
     meta {
@@ -45,7 +46,8 @@ workflow TestReblockGVCF {
         annotations_to_keep_command = annotations_to_keep_command,
         annotations_to_remove_command = annotations_to_remove_command,
         move_filters_to_genotypes = move_filters_to_genotypes,
-        gvcf_file_extension = gvcf_file_extension
+        gvcf_file_extension = gvcf_file_extension,
+        cloud_provider = cloud_provider
     }
 
     
diff --git a/verification/test-wdls/TestVariantCalling.wdl b/verification/test-wdls/TestVariantCalling.wdl
index b2c3b29273..3054e0a1b9 100644
--- a/verification/test-wdls/TestVariantCalling.wdl
+++ b/verification/test-wdls/TestVariantCalling.wdl
@@ -39,6 +39,7 @@ workflow TestVariantCalling {
       Boolean update_truth
       String vault_token_path
       String google_account_vault_path
+      String cloud_provider
     }
 
     meta {
@@ -69,7 +70,8 @@ workflow TestVariantCalling {
         make_bamout = make_bamout,
         use_gatk3_haplotype_caller = use_gatk3_haplotype_caller,
         skip_reblocking = skip_reblocking,
-        use_dragen_hard_filtering = use_dragen_hard_filtering
+        use_dragen_hard_filtering = use_dragen_hard_filtering,
+        cloud_provider = cloud_provider
   
     }
 
diff --git a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl
index d3f775dcc7..16b54c3876 100644
--- a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl
+++ b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl
@@ -32,6 +32,7 @@ workflow TestWholeGenomeGermlineSingleSample {
     Boolean use_bwa_mem = true
     Boolean allow_empty_ref_alt = false
     Boolean use_dragen_hard_filtering = false
+    String cloud_provider
 
     # These values will be determined and injected into the inputs by the scala test framework
     String truth_path
@@ -66,7 +67,8 @@ workflow TestWholeGenomeGermlineSingleSample {
       perform_bqsr                       = perform_bqsr,
       use_bwa_mem                        = use_bwa_mem,
       allow_empty_ref_alt                = allow_empty_ref_alt,
-      use_dragen_hard_filtering          = use_dragen_hard_filtering
+      use_dragen_hard_filtering          = use_dragen_hard_filtering,
+      cloud_provider                     = cloud_provider
   }
 
   # Collect all of the pipeline outputs into a single Array[String]

From 0e38aa89df4a757da9f50457db67515a4610af5f Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 13:19:27 -0400
Subject: [PATCH 042/186] edited wdl and changelog for ToA support

---
 pipelines/skylab/snm3C/snm3C.changelog.md |  4 ++++
 pipelines/skylab/snm3C/snm3C.wdl          | 21 ++++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.changelog.md b/pipelines/skylab/snm3C/snm3C.changelog.md
index afd28595c3..4ee5f7f128 100644
--- a/pipelines/skylab/snm3C/snm3C.changelog.md
+++ b/pipelines/skylab/snm3C/snm3C.changelog.md
@@ -1,3 +1,7 @@
+# 4.0.1
+2024-04-18 (Date of Last Commit)
+* Updated the snM3C wdl to run on Azure. This change does not affect the snM3C pipeline.
+
 # 4.0.0
 2024-03-15 (Date of Last Commit)
 * Reconstructed code and merged tasks to optimize pipeline and reduce cost 
diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 0aa726d1b4..3b14bb8fd2 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -11,6 +11,7 @@ workflow snm3C {
         File tarred_index_files
         File genome_fa
         File chromosome_sizes
+        String cloud_provider
 
         String r1_adapter = "AGATCGGAAGAGCACACGTCTGAAC"
         String r2_adapter = "AGATCGGAAGAGCGTCGTGTAGGGA"
@@ -23,11 +24,17 @@ workflow snm3C {
         Int num_downstr_bases = 2
         Int compress_level = 5
         Int batch_number
-        String docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4"
     }
+    # Determine docker prefix based on cloud provider
+    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
+    String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
+    String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+
+    String snm3C_docker_image = "m3c-yap-hisat:2.4"
+
 
     # version of the pipeline
-    String pipeline_version = "4.0.0"
+    String pipeline_version = "4.0.1"
 
     call Demultiplexing {
         input:
@@ -35,7 +42,7 @@ workflow snm3C {
             fastq_input_read2 = fastq_input_read2,
             random_primer_indexes = random_primer_indexes,
             plate_id = plate_id,
-            docker = docker,
+            docker = docker_prefix + snm3C_docker_image,
             batch_number = batch_number
     }
 
@@ -54,7 +61,7 @@ workflow snm3C {
                 r2_left_cut = r2_left_cut,
                 r2_right_cut = r2_right_cut,
                 plate_id = plate_id,
-                docker = docker
+                docker = docker_prefix + snm3C_docker_image
         }
 
         call Hisat_single_end as Hisat_single_end {
@@ -63,7 +70,7 @@ workflow snm3C {
                 tarred_index_files = tarred_index_files,
                 genome_fa = genome_fa,
                 plate_id = plate_id,
-                docker = docker
+                docker = docker_prefix + snm3C_docker_image
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {
@@ -76,7 +83,7 @@ workflow snm3C {
                compress_level = compress_level,
                chromosome_sizes = chromosome_sizes,
                plate_id = plate_id,
-               docker = docker
+               docker = docker_prefix + snm3C_docker_image
         }
     }
 
@@ -91,7 +98,7 @@ workflow snm3C {
             allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
             unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
             plate_id = plate_id,
-            docker = docker
+            docker = docker_prefix + snm3C_docker_image
     }
 
     meta {

From 2d7e5fc26f8b11a8700b8afce0f41ec45852151f Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 13:20:35 -0400
Subject: [PATCH 043/186] added cloud provider input to test inputs json

---
 pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json b/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json
index af100dea41..fcacdc5069 100644
--- a/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json
+++ b/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json
@@ -19,5 +19,6 @@
   "snm3C.batch_number": 2,
   "snm3C.Hisat_paired_end.cpu_platform" : "Intel Cascade Lake",
   "snm3C.Hisat_single_end.cpu_platform" : "Intel Cascade Lake",
-  "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake"
+  "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake",
+  "snm3C.cloud_provider" : "gcp"
 }

From ed008c6c6273ca04b354abcf1f895021c9af8945 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 13:24:46 -0400
Subject: [PATCH 044/186] added util class to log error if unsupported cloud
 provider used for cloud_provider input

---
 pipelines/skylab/snm3C/snm3C.wdl | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 3b14bb8fd2..e1c69183d2 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1,4 +1,6 @@
 version 1.0
+import "../../../tasks/broad/Utilities.wdl" as utils
+
 
 workflow snm3C {
 
@@ -31,7 +33,13 @@ workflow snm3C {
     String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
 
     String snm3C_docker_image = "m3c-yap-hisat:2.4"
-
+    # make sure either gcp or azr is supplied as cloud_provider input
+    if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
+        call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
+        input:
+            message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
+        }
+    }
 
     # version of the pipeline
     String pipeline_version = "4.0.1"

From 1f6559b8ff7fc73ff8b3b6157b34958c19b2b283 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 14:31:51 -0400
Subject: [PATCH 045/186] added cloud provider parameter to test json

---
 .../skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json b/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json
index 0709e99fb9..e53437328d 100644
--- a/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json
+++ b/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json
@@ -19,5 +19,6 @@
   "snm3C.batch_number": 2,
   "snm3C.Hisat_paired_end.cpu_platform" : "Intel Cascade Lake",
   "snm3C.Hisat_single_end.cpu_platform" : "Intel Cascade Lake",
-  "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake"
+  "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake",
+  "snm3C.cloud_provider" : "gcp"
 }

From 45e918089a55fff507811658aba418852379b76b Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 14:45:44 -0400
Subject: [PATCH 046/186] refactored task inputs

---
 pipelines/skylab/snm3C/snm3C.wdl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index e1c69183d2..eb321f1d8c 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -9,11 +9,11 @@ workflow snm3C {
         Array[File] fastq_input_read2
         File random_primer_indexes
         String plate_id
+        String cloud_provider
         # mapping inputs
         File tarred_index_files
         File genome_fa
         File chromosome_sizes
-        String cloud_provider
 
         String r1_adapter = "AGATCGGAAGAGCACACGTCTGAAC"
         String r2_adapter = "AGATCGGAAGAGCGTCGTGTAGGGA"
@@ -50,7 +50,6 @@ workflow snm3C {
             fastq_input_read2 = fastq_input_read2,
             random_primer_indexes = random_primer_indexes,
             plate_id = plate_id,
-            docker = docker_prefix + snm3C_docker_image,
             batch_number = batch_number
     }
 
@@ -135,7 +134,7 @@ task Demultiplexing {
     File random_primer_indexes
     String plate_id
     Int batch_number
-    String docker
+    String docker = docker_prefix + snm3C_docker_image
 
     Int disk_size = 1000
     Int mem_size = 10
@@ -245,7 +244,7 @@ task Hisat_paired_end {
         File genome_fa
         File chromosome_sizes
         String plate_id
-        String docker
+        String docker = docker_prefix + snm3C_docker_image
 
         String r1_adapter
         String r2_adapter

From 1b376dfee475b017fc5706f0c8434b587915b020 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 15:20:47 -0400
Subject: [PATCH 047/186] change to inputs

---
 pipelines/skylab/snm3C/snm3C.wdl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index eb321f1d8c..22dcead23f 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -50,7 +50,9 @@ workflow snm3C {
             fastq_input_read2 = fastq_input_read2,
             random_primer_indexes = random_primer_indexes,
             plate_id = plate_id,
-            batch_number = batch_number
+            batch_number = batch_number,
+            docker = docker_prefix + snm3C_docker_image
+
     }
 
     scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
@@ -134,7 +136,7 @@ task Demultiplexing {
     File random_primer_indexes
     String plate_id
     Int batch_number
-    String docker = docker_prefix + snm3C_docker_image
+    String docker
 
     Int disk_size = 1000
     Int mem_size = 10
@@ -244,7 +246,7 @@ task Hisat_paired_end {
         File genome_fa
         File chromosome_sizes
         String plate_id
-        String docker = docker_prefix + snm3C_docker_image
+        String docker
 
         String r1_adapter
         String r2_adapter

From 3c588e3084745090db25534eb8456d49002c635c Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 15:29:54 -0400
Subject: [PATCH 048/186] change to docker path generation

---
 pipelines/skylab/snm3C/snm3C.wdl | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 22dcead23f..1e08eb723c 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -28,11 +28,10 @@ workflow snm3C {
         Int batch_number
     }
     # Determine docker prefix based on cloud provider
-    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
-    String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
-    String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+    String gcr_docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4"
+    String acr_docker = "dsppipelinedev.azurecr.io/m3c-yap-hisat:2.4"
+    String snm3c_docker = if cloud_provider == "gcp" then gcr_docker else acr_docker
 
-    String snm3C_docker_image = "m3c-yap-hisat:2.4"
     # make sure either gcp or azr is supplied as cloud_provider input
     if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
         call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
@@ -51,7 +50,7 @@ workflow snm3C {
             random_primer_indexes = random_primer_indexes,
             plate_id = plate_id,
             batch_number = batch_number,
-            docker = docker_prefix + snm3C_docker_image
+            docker = snm3c_docker
 
     }
 
@@ -70,7 +69,7 @@ workflow snm3C {
                 r2_left_cut = r2_left_cut,
                 r2_right_cut = r2_right_cut,
                 plate_id = plate_id,
-                docker = docker_prefix + snm3C_docker_image
+                docker = snm3c_docker
         }
 
         call Hisat_single_end as Hisat_single_end {
@@ -79,7 +78,7 @@ workflow snm3C {
                 tarred_index_files = tarred_index_files,
                 genome_fa = genome_fa,
                 plate_id = plate_id,
-                docker = docker_prefix + snm3C_docker_image
+                docker = snm3c_docker
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {
@@ -92,7 +91,7 @@ workflow snm3C {
                compress_level = compress_level,
                chromosome_sizes = chromosome_sizes,
                plate_id = plate_id,
-               docker = docker_prefix + snm3C_docker_image
+               docker = snm3c_docker
         }
     }
 
@@ -107,7 +106,7 @@ workflow snm3C {
             allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
             unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
             plate_id = plate_id,
-            docker = docker_prefix + snm3C_docker_image
+            docker = snm3c_docker
     }
 
     meta {

From dd7c16759b1c4c4aefb37bbad80400e548ffb80b Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 18 Apr 2024 15:47:36 -0400
Subject: [PATCH 049/186] changes made to test wdl

---
 verification/test-wdls/Testsnm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/verification/test-wdls/Testsnm3C.wdl b/verification/test-wdls/Testsnm3C.wdl
index 7409e08311..ec54ae128e 100644
--- a/verification/test-wdls/Testsnm3C.wdl
+++ b/verification/test-wdls/Testsnm3C.wdl
@@ -36,7 +36,7 @@ workflow Testsnm3C {
       String vault_token_path
       String google_account_vault_path
 
-      String docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4"
+      String cloud_provider
     }
 
     meta {
@@ -63,7 +63,7 @@ workflow Testsnm3C {
         num_downstr_bases = num_downstr_bases,
         compress_level = compress_level,
         batch_number = batch_number,
-        docker = docker
+        cloud_provider = cloud_provider
     }
 
     

From 0f6ce91520446eaa6daa6d2d9e462488e95d6cb0 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Fri, 19 Apr 2024 11:51:51 -0400
Subject: [PATCH 050/186] updated reference to docker images for consistency
 with other azurized wdls

---
 pipelines/skylab/snm3C/snm3C.wdl | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 1e08eb723c..44a0293c33 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -27,9 +27,11 @@ workflow snm3C {
         Int compress_level = 5
         Int batch_number
     }
+    #docker images
+    String m3c_yap_hisat_docker = "m3c-yap-hisat:2.4"
     # Determine docker prefix based on cloud provider
-    String gcr_docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4"
-    String acr_docker = "dsppipelinedev.azurecr.io/m3c-yap-hisat:2.4"
+    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4"
+    String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
     String snm3c_docker = if cloud_provider == "gcp" then gcr_docker else acr_docker
 
     # make sure either gcp or azr is supplied as cloud_provider input
@@ -50,7 +52,7 @@ workflow snm3C {
             random_primer_indexes = random_primer_indexes,
             plate_id = plate_id,
             batch_number = batch_number,
-            docker = snm3c_docker
+            docker = docker_prefix + m3c_yap_hisat_docker
 
     }
 
@@ -69,7 +71,7 @@ workflow snm3C {
                 r2_left_cut = r2_left_cut,
                 r2_right_cut = r2_right_cut,
                 plate_id = plate_id,
-                docker = snm3c_docker
+                docker = docker_prefix + m3c_yap_hisat_docker
         }
 
         call Hisat_single_end as Hisat_single_end {
@@ -78,7 +80,7 @@ workflow snm3C {
                 tarred_index_files = tarred_index_files,
                 genome_fa = genome_fa,
                 plate_id = plate_id,
-                docker = snm3c_docker
+                docker = docker_prefix + m3c_yap_hisat_docker
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {
@@ -91,7 +93,7 @@ workflow snm3C {
                compress_level = compress_level,
                chromosome_sizes = chromosome_sizes,
                plate_id = plate_id,
-               docker = snm3c_docker
+               docker = docker_prefix + m3c_yap_hisat_docker
         }
     }
 
@@ -106,7 +108,7 @@ workflow snm3C {
             allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
             unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
             plate_id = plate_id,
-            docker = snm3c_docker
+            docker = docker_prefix + m3c_yap_hisat_docker
     }
 
     meta {

From d0b8f05216ca4c8a39b21907d30e029347b3144b Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Fri, 19 Apr 2024 11:52:31 -0400
Subject: [PATCH 051/186] fix to docker image reference

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 44a0293c33..22c15ba2c1 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -30,7 +30,7 @@ workflow snm3C {
     #docker images
     String m3c_yap_hisat_docker = "m3c-yap-hisat:2.4"
     # Determine docker prefix based on cloud provider
-    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4"
+    String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
     String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
     String snm3c_docker = if cloud_provider == "gcp" then gcr_docker else acr_docker
 

From 790f39e4466b8eaa7a43d5f37269d769f53a62a3 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Fri, 19 Apr 2024 16:18:04 -0400
Subject: [PATCH 052/186] fix to docker prefix call

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 22c15ba2c1..8d18cc2d35 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -32,7 +32,7 @@ workflow snm3C {
     # Determine docker prefix based on cloud provider
     String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
     String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
-    String snm3c_docker = if cloud_provider == "gcp" then gcr_docker else acr_docker
+    String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
 
     # make sure either gcp or azr is supplied as cloud_provider input
     if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
@@ -80,7 +80,7 @@ workflow snm3C {
                 tarred_index_files = tarred_index_files,
                 genome_fa = genome_fa,
                 plate_id = plate_id,
-                docker = docker_prefix + m3c_yap_hisat_docker
+                docker = snm3c_docker + m3c_yap_hisat_docker
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {

From ea3530c16abe5c5f34c6848d3e0696395f1ad6c4 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Mon, 22 Apr 2024 09:23:07 -0400
Subject: [PATCH 053/186] updated var name

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 8d18cc2d35..5e209fb6a9 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -80,7 +80,7 @@ workflow snm3C {
                 tarred_index_files = tarred_index_files,
                 genome_fa = genome_fa,
                 plate_id = plate_id,
-                docker = snm3c_docker + m3c_yap_hisat_docker
+                docker = docker_prefix + m3c_yap_hisat_docker
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {

From dc85702fa3f48c15fa3485e1b9bf1571711e8fd1 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 29 Apr 2024 11:35:20 -0400
Subject: [PATCH 054/186] replace hard-coded cromwell_root with variable based
 on cloud env

---
 pipelines/skylab/snm3C/snm3C.wdl | 92 ++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 41 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 5e209fb6a9..e628c39091 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -33,6 +33,7 @@ workflow snm3C {
     String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
     String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
     String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
+    String cromwell_root_dir = if cloud_provider == "gcp" then "/cromwell_root" else "/cromwell-executions"
 
     # make sure either gcp or azr is supplied as cloud_provider input
     if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
@@ -52,8 +53,8 @@ workflow snm3C {
             random_primer_indexes = random_primer_indexes,
             plate_id = plate_id,
             batch_number = batch_number,
-            docker = docker_prefix + m3c_yap_hisat_docker
-
+            docker = docker_prefix + m3c_yap_hisat_docker,
+            cromwell_root_dir = cromwell_root_dir
     }
 
     scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
@@ -71,7 +72,8 @@ workflow snm3C {
                 r2_left_cut = r2_left_cut,
                 r2_right_cut = r2_right_cut,
                 plate_id = plate_id,
-                docker = docker_prefix + m3c_yap_hisat_docker
+                docker = docker_prefix + m3c_yap_hisat_docker,
+                cromwell_root_dir = cromwell_root_dir
         }
 
         call Hisat_single_end as Hisat_single_end {
@@ -80,7 +82,8 @@ workflow snm3C {
                 tarred_index_files = tarred_index_files,
                 genome_fa = genome_fa,
                 plate_id = plate_id,
-                docker = docker_prefix + m3c_yap_hisat_docker
+                docker = docker_prefix + m3c_yap_hisat_docker,
+                cromwell_root_dir = cromwell_root_dir
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {
@@ -93,7 +96,8 @@ workflow snm3C {
                compress_level = compress_level,
                chromosome_sizes = chromosome_sizes,
                plate_id = plate_id,
-               docker = docker_prefix + m3c_yap_hisat_docker
+               docker = docker_prefix + m3c_yap_hisat_docker,
+               cromwell_root_dir = cromwell_root_dir
         }
     }
 
@@ -108,7 +112,8 @@ workflow snm3C {
             allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
             unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
             plate_id = plate_id,
-            docker = docker_prefix + m3c_yap_hisat_docker
+            docker = docker_prefix + m3c_yap_hisat_docker,
+            cromwell_root_dir = cromwell_root_dir
     }
 
     meta {
@@ -138,6 +143,7 @@ task Demultiplexing {
     String plate_id
     Int batch_number
     String docker
+    String cromwell_root_dir
 
     Int disk_size = 1000
     Int mem_size = 10
@@ -169,7 +175,7 @@ task Demultiplexing {
     import os
 
     # Parsing stats.txt file
-    stats_file_path = '/cromwell_root/~{plate_id}.stats.txt'
+    stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
     adapter_counts = {}
     with open(stats_file_path, 'r') as file:
         content = file.read()
@@ -181,7 +187,7 @@ task Demultiplexing {
         adapter_counts[adapter_name] = trimmed_count
 
     # Removing fastq files with trimmed reads greater than 30
-    directory_path = '/cromwell_root'
+    directory_path = ~{cromwell_root_dir}
     threshold = 10000000
 
     for filename in os.listdir(directory_path):
@@ -248,6 +254,7 @@ task Hisat_paired_end {
         File chromosome_sizes
         String plate_id
         String docker
+        String cromwell_root_dir
 
         String r1_adapter
         String r2_adapter
@@ -315,7 +322,7 @@ task Hisat_paired_end {
           # sort 
           start=$(date +%s)
           echo "Run sort r1"
-          zcat /cromwell_root/batch*/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
+          zcat ~{cromwell_root_dir}/batch*/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run sort r1: $elapsed seconds"
@@ -323,7 +330,7 @@ task Hisat_paired_end {
           # sort 
           start=$(date +%s)
           echo "Run sort r2"
-          zcat /cromwell_root/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
+          zcat ~{cromwell_root_dir}/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run sort r2: $elapsed seconds"
@@ -353,7 +360,7 @@ task Hisat_paired_end {
           # hisat run
           start=$(date +%s)
           echo "Run hisat"
-          hisat-3n /cromwell_root/$genome_fa_basename \
+          hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
           -q \
           -1 ${sample_id}-R1_trimmed.fq.gz \
           -2 ${sample_id}-R2_trimmed.fq.gz \
@@ -385,7 +392,7 @@ task Hisat_paired_end {
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
           
-          rm /cromwell_root/batch*/${sample_id}-R1.fq.gz /cromwell_root/batch*/${sample_id}-R2.fq.gz
+          rm ~{cromwell_root_dir}/batch*/${sample_id}-R1.fq.gz ~{cromwell_root_dir}/batch*/${sample_id}-R2.fq.gz
           rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
           rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz
           rm ${sample_id}.hisat3n_dna.unsort.bam ${sample_id}.hisat3n_dna.multi_aligned.bam
@@ -498,6 +505,7 @@ task Hisat_single_end {
         File tarred_index_files
         String plate_id
         String docker
+        String cromwell_root_dir
 
         Int disk_size = 1000 
         Int mem_size = 64  
@@ -541,8 +549,8 @@ task Hisat_single_end {
         echo "Elapsed time to untar split_fq_tar: $elapsed seconds"
       
         # make directories 
-        mkdir -p /cromwell_root/merged_sort_bams
-        mkdir -p /cromwell_root/read_overlap
+        mkdir -p ~{cromwell_root_dir}/merged_sort_bams
+        mkdir -p ~{cromwell_root_dir}/read_overlap
    
         # define lists of r1 and r2 fq files
         R1_files=($(ls | grep "\.hisat3n_dna.split_reads.R1.fastq"))
@@ -557,7 +565,7 @@ task Hisat_single_end {
           start=$(date +%s) 
    
           # hisat on R1 single end
-          hisat-3n /cromwell_root/$genome_fa_basename \
+          hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
           -q \
           -U ${BASE}.hisat3n_dna.split_reads.R1.fastq \
           -S ${BASE}.hisat3n_dna.split_reads.R1.sam --directional-mapping-reverse --base-change C,T \
@@ -579,7 +587,7 @@ task Hisat_single_end {
          echo "Running hisat on sample_id_R2" $BASE
 
          # hisat on R2 single end
-         hisat-3n /cromwell_root/$genome_fa_basename \
+         hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
          -q \
          -U ${BASE}.hisat3n_dna.split_reads.R2.fastq \
          -S ${BASE}.hisat3n_dna.split_reads.R2.sam --directional-mapping --base-change C,T \
@@ -622,7 +630,7 @@ task Hisat_single_end {
          # remove_overlap_read_parts
          echo "call remove_overlap_read_parts" 
          start=$(date +%s) 
-         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=os.path.join(os.path.sep,"cromwell_root","'"$BASE"'.name_sorted.filtered.bam"),out_bam_path=os.path.join(os.path.sep,"cromwell_root","'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam"))'  
+         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=os.path.join(os.path.sep,~{cromwell_root_dir},"'"$BASE"'.name_sorted.filtered.bam"),out_bam_path=os.path.join(os.path.sep,~{cromwell_root_dir},"'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam"))'
          end=$(date +%s) 
          elapsed=$((end - start))  
          echo "Elapsed time to run remove overlap $elapsed seconds"
@@ -708,6 +716,7 @@ task Merge_sort_analyze {
         File paired_end_unique_tar
         File read_overlap_tar
         String docker
+        String cromwell_root_dir
 
         #input for allcools bam-to-allc
         File genome_fa
@@ -769,9 +778,9 @@ task Merge_sort_analyze {
       fi
 
       # make directories
-      mkdir /cromwell_root/output_bams
-      mkdir /cromwell_root/temp
-      mkdir /cromwell_root/allc-${mcg_context}
+      mkdir ~{cromwell_root_dir}/output_bams
+      mkdir ~{cromwell_root_dir}temp
+      mkdir ~{cromwell_root_dir}allc-${mcg_context}
       
       task() {
         local file=$1
@@ -802,16 +811,16 @@ task Merge_sort_analyze {
         start=$(date +%s)  
         echo "Call Picard remove duplicates"
         name=${sample_id}.hisat3n_dna.all_reads.deduped
-        picard MarkDuplicates I=${sample_id}.hisat3n_dna.all_reads.pos_sort.bam O=/cromwell_root/output_bams/${name}.bam \
-        M=/cromwell_root/output_bams/${name}.matrix.txt \
-        REMOVE_DUPLICATES=true TMP_DIR=/cromwell_root/temp
+        picard MarkDuplicates I=${sample_id}.hisat3n_dna.all_reads.pos_sort.bam O=~{cromwell_root_dir}/output_bams/${name}.bam \
+        M=~{cromwell_root_dir}/output_bams/${name}.matrix.txt \
+        REMOVE_DUPLICATES=true TMP_DIR=~{cromwell_root_dir}/temp
         end=$(date +%s) 
         elapsed=$((end - start))  
         echo "Elapsed time to run picard $elapsed seconds"
         
         start=$(date +%s)  
         echo "Call samtools index"
-        samtools index /cromwell_root/output_bams/${name}.bam
+        samtools index ~{cromwell_root_dir}/output_bams/${name}.bam
         end=$(date +%s) 
         elapsed=$((end - start)) 
         echo "Elapsed time to samtools index $elapsed seconds" 
@@ -826,8 +835,8 @@ task Merge_sort_analyze {
         start=$(date +%s)  
         echo "Call allcools bam-to-allc from deduped.bams" 
         /opt/conda/bin/allcools bam-to-allc \
-        --bam_path /cromwell_root/output_bams/${name}.bam \
-        --reference_fasta /cromwell_root/reference/~{genome_base} \
+        --bam_path ~{cromwell_root_dir}/output_bams/${name}.bam \
+        --reference_fasta ~{cromwell_root_dir}/reference/~{genome_base} \
         --output_path "${sample_id}.allc.tsv.gz" \
         --num_upstr_bases ~{num_upstr_bases} \
         --num_downstr_bases ~{num_downstr_bases} \
@@ -842,7 +851,7 @@ task Merge_sort_analyze {
         echo "Call allcools extract-all" 
         allcools extract-allc --strandness merge \
         --allc_path ${sample_id}.allc.tsv.gz \
-        --output_prefix /cromwell_root/allc-${mcg_context}/${sample_id} \
+        --output_prefix ~{cromwell_root_dir}/allc-${mcg_context}/${sample_id} \
         --mc_contexts ${mcg_context} \
         --chrom_size_path ~{chromosome_sizes}
         end=$(date +%s) 
@@ -852,8 +861,8 @@ task Merge_sort_analyze {
         echo "Remove some bams"
         rm ${sample_id}.hisat3n_dna.all_reads.bam
         rm ${sample_id}.hisat3n_dna.all_reads.pos_sort.bam
-        rm /cromwell_root/${sample_id}.hisat3n_dna.split_reads.read_overlap.bam
-        rm /cromwell_root/${sample_id}.hisat3n_dna.unique_aligned.bam
+        rm ~{cromwell_root_dir}/${sample_id}.hisat3n_dna.split_reads.read_overlap.bam
+        rm ~{cromwell_root_dir}/${sample_id}.hisat3n_dna.unique_aligned.bam
       }
  
       # run 4 instances of task in parallel 
@@ -908,8 +917,8 @@ task Merge_sort_analyze {
       tar -cf - *.allc.tsv.gz | pigz > ~{plate_id}.allc.tsv.tar.gz
       tar -cf - *.allc.tsv.gz.tbi | pigz > ~{plate_id}.allc.tbi.tar.gz
       tar -cf -  *.allc.tsv.gz.count.csv | pigz > ~{plate_id}.allc.count.tar.gz
-      tar -cf -  /cromwell_root/allc-${mcg_context}/*.gz | pigz > ~{plate_id}.extract-allc.tar.gz
-      tar -cf -  /cromwell_root/allc-${mcg_context}/*.tbi | pigz > ~{plate_id}.extract-allc_tbi.tar.gz
+      tar -cf -  ~{cromwell_root_dir}allc-${mcg_context}/*.gz | pigz > ~{plate_id}.extract-allc.tar.gz
+      tar -cf -  ~{cromwell_root_dir}/allc-${mcg_context}/*.tbi | pigz > ~{plate_id}.extract-allc_tbi.tar.gz
     >>>
 
     runtime {
@@ -946,6 +955,7 @@ task Summary {
         Array[File] allc_uniq_reads_stats
         Array[File] unique_reads_cgn_extraction_tbi
         String plate_id
+        String cromwell_root_dir
 
         String docker
         Int disk_size = 80
@@ -956,10 +966,10 @@ task Summary {
     command <<<
         set -euo pipefail
 
-        mkdir /cromwell_root/fastq
-        mkdir /cromwell_root/bam
-        mkdir /cromwell_root/allc
-        mkdir /cromwell_root/hic
+        mkdir ~{cromwell_root_dir}/fastq
+        mkdir ~{cromwell_root_dir}/bam
+        mkdir ~{cromwell_root_dir}/allc
+        mkdir ~{cromwell_root_dir}/hic
 
         extract_and_remove() {
             if [ $# -eq 0 ];
@@ -982,12 +992,12 @@ task Summary {
         extract_and_remove ~{sep=' ' allc_uniq_reads_stats}
         extract_and_remove ~{sep=' ' unique_reads_cgn_extraction_tbi}
 
-        mv *.trimmed.stats.txt /cromwell_root/fastq
-        mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt /cromwell_root/bam
-        mv output_bams/*.hisat3n_dna.all_reads.deduped.matrix.txt /cromwell_root/bam
-        mv *.hisat3n_dna.all_reads.contact_stats.csv /cromwell_root/hic
-        mv *.allc.tsv.gz.count.csv /cromwell_root/allc
-        mv cromwell_root/allc-CGN/*.allc.tsv.gz.tbi /cromwell_root/allc
+        mv *.trimmed.stats.txt ~{cromwell_root_dir}/fastq
+        mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt ~{cromwell_root_dir}/bam
+        mv output_bams/*.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
+        mv *.hisat3n_dna.all_reads.contact_stats.csv ~{cromwell_root_dir}/hic
+        mv *.allc.tsv.gz.count.csv ~{cromwell_root_dir}/allc
+        mv ~{cromwell_root_dir}/allc-CGN/*.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc
 
         python3 -c 'from cemba_data.hisat3n import *;snm3c_summary()'
         mv MappingSummary.csv.gz ~{plate_id}_MappingSummary.csv.gz

From be700227245282ff69692faf65873c158b51ab45 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 29 Apr 2024 11:55:06 -0400
Subject: [PATCH 055/186] wrap param name in quotations

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index e628c39091..3abe45ac29 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -187,7 +187,7 @@ task Demultiplexing {
         adapter_counts[adapter_name] = trimmed_count
 
     # Removing fastq files with trimmed reads greater than 30
-    directory_path = ~{cromwell_root_dir}
+    directory_path = '~{cromwell_root_dir}'
     threshold = 10000000
 
     for filename in os.listdir(directory_path):

From 73b2e90278beda985fa72a4efc3b0a38dfe893a0 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 29 Apr 2024 14:29:02 -0400
Subject: [PATCH 056/186] provide absolute path

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 3abe45ac29..b757719f7a 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -165,7 +165,7 @@ task Demultiplexing {
     -p ~{plate_id}-{name}-R2.fq.gz \
     r1.fastq.gz \
     r2.fastq.gz \
-    > ~{plate_id}.stats.txt
+    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
 
     # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
     rm *-unknown-R{1,2}.fq.gz

From 0df475b2ab992ab2644e969cddb725f794e72145 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 29 Apr 2024 14:55:47 -0400
Subject: [PATCH 057/186] write out files using ls

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index b757719f7a..d5f7f78a6a 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -165,8 +165,8 @@ task Demultiplexing {
     -p ~{plate_id}-{name}-R2.fq.gz \
     r1.fastq.gz \
     r2.fastq.gz \
-    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
-
+    > ~{plate_id}.stats.txt
+    ls -lh
     # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
     rm *-unknown-R{1,2}.fq.gz
 

From 8297ab24694235d5a0cff95c8dad757dc541f906 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 1 May 2024 12:55:57 -0400
Subject: [PATCH 058/186] add print for working dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index d5f7f78a6a..0af7533eed 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -154,6 +154,10 @@ task Demultiplexing {
   command <<<
     set -euo pipefail
 
+    ls -lR
+    pwd
+
+
     # Cat files for each r1, r2
     cat ~{sep=' ' fastq_input_read1} > r1.fastq.gz
     cat ~{sep=' ' fastq_input_read2} > r2.fastq.gz
@@ -166,7 +170,7 @@ task Demultiplexing {
     r1.fastq.gz \
     r2.fastq.gz \
     > ~{plate_id}.stats.txt
-    ls -lh
+
     # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
     rm *-unknown-R{1,2}.fq.gz
 

From 7a255c38aeb05f2134ac2ac9e2bad36f94e8fa1f Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 1 May 2024 14:20:15 -0400
Subject: [PATCH 059/186] provide absolute paths for demultiplexing

---
 pipelines/skylab/snm3C/snm3C.wdl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 0af7533eed..a22b66a7f0 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -159,8 +159,8 @@ task Demultiplexing {
 
 
     # Cat files for each r1, r2
-    cat ~{sep=' ' fastq_input_read1} > r1.fastq.gz
-    cat ~{sep=' ' fastq_input_read2} > r2.fastq.gz
+    cat ~{sep=' ' fastq_input_read1} > ~{cromwell_root_dir}/r1.fastq.gz
+    cat ~{sep=' ' fastq_input_read2} > ~{cromwell_root_dir}/r2.fastq.gz
 
     # Run cutadapt
     /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \
@@ -169,10 +169,10 @@ task Demultiplexing {
     -p ~{plate_id}-{name}-R2.fq.gz \
     r1.fastq.gz \
     r2.fastq.gz \
-    > ~{plate_id}.stats.txt
+    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
 
     # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
-    rm *-unknown-R{1,2}.fq.gz
+    rm ~{cromwell_root_dir}/*-unknown-R{1,2}.fq.gz
 
     python3 <<CODE
     import re
@@ -215,15 +215,15 @@ task Demultiplexing {
     folder_index=1
 
     # Define lists of r1 and r2 fq files
-    R1_files=($(ls | grep "\-R1.fq.gz"))
-    R2_files=($(ls | grep "\-R2.fq.gz"))
+    R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
+    R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
 
     # Distribute the FASTQ files and create TAR files
     for file in "${R1_files[@]}"; do
         sample_id=$(basename "$file" "-R1.fq.gz")
         r2_file="${sample_id}-R2.fq.gz"
-        mv $file batch$((folder_index))/$file
-        mv $r2_file batch$((folder_index))/$r2_file
+        mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
+        mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
         # Increment the counter
         folder_index=$(( (folder_index % $batch_number) + 1 ))
     done
@@ -231,7 +231,7 @@ task Demultiplexing {
     # Tar up files per batch
     echo "TAR files"
     for i in $(seq 1 "${batch_number}"); do
-        tar -cf - batch${i}/*.fq.gz | pigz > ~{plate_id}.${i}.cutadapt_output_files.tar.gz
+        tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
     done
     echo "TAR files created successfully."
   >>>

From 49b84a3e20fa30707717e308b3f6320cef87005b Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 2 May 2024 13:17:21 -0400
Subject: [PATCH 060/186] testing

---
 pipelines/skylab/snm3C/snm3C.wdl | 156 ++++++++++++++++---------------
 1 file changed, 80 insertions(+), 76 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index a22b66a7f0..6493d31661 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -152,88 +152,92 @@ task Demultiplexing {
   }
 
   command <<<
+    echo "TEST"
+
+
     set -euo pipefail
 
     ls -lR
     pwd
 
 
-    # Cat files for each r1, r2
-    cat ~{sep=' ' fastq_input_read1} > ~{cromwell_root_dir}/r1.fastq.gz
-    cat ~{sep=' ' fastq_input_read2} > ~{cromwell_root_dir}/r2.fastq.gz
-
-    # Run cutadapt
-    /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \
-    -g file:~{random_primer_indexes} \
-    -o ~{plate_id}-{name}-R1.fq.gz \
-    -p ~{plate_id}-{name}-R2.fq.gz \
-    r1.fastq.gz \
-    r2.fastq.gz \
-    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
-
-    # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
-    rm ~{cromwell_root_dir}/*-unknown-R{1,2}.fq.gz
-
-    python3 <<CODE
-    import re
-    import os
-
-    # Parsing stats.txt file
-    stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
-    adapter_counts = {}
-    with open(stats_file_path, 'r') as file:
-        content = file.read()
-
-    adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
-    for adapter_match in adapter_matches:
-        adapter_name = adapter_match[0]
-        trimmed_count = int(adapter_match[1])
-        adapter_counts[adapter_name] = trimmed_count
-
-    # Removing fastq files with trimmed reads greater than 30
-    directory_path = '~{cromwell_root_dir}'
-    threshold = 10000000
-
-    for filename in os.listdir(directory_path):
-        if filename.endswith('.fq.gz'):
-            file_path = os.path.join(directory_path, filename)
-            adapter_name = re.search(r'A(\d+)-R', filename)
-            if adapter_name:
-                adapter_name = 'A' + adapter_name.group(1)
-                if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
-                    os.remove(file_path)
-                    print(f'Removed file: {filename}')
-    CODE
-
-    # Batch the fastq files into folders of batch_number size
-    batch_number=~{batch_number}
-    for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
-        mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
-    done
-
-    # Counter for the folder index
-    folder_index=1
-
-    # Define lists of r1 and r2 fq files
-    R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
-    R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
-
-    # Distribute the FASTQ files and create TAR files
-    for file in "${R1_files[@]}"; do
-        sample_id=$(basename "$file" "-R1.fq.gz")
-        r2_file="${sample_id}-R2.fq.gz"
-        mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
-        mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
-        # Increment the counter
-        folder_index=$(( (folder_index % $batch_number) + 1 ))
-    done
-
-    # Tar up files per batch
-    echo "TAR files"
-    for i in $(seq 1 "${batch_number}"); do
-        tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
-    done
-    echo "TAR files created successfully."
+#    # Cat files for each r1, r2
+#    cat ~{sep=' ' fastq_input_read1} > ~{cromwell_root_dir}/r1.fastq.gz
+#    cat ~{sep=' ' fastq_input_read2} > ~{cromwell_root_dir}/r2.fastq.gz
+#
+#    # Run cutadapt
+#    /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \
+#    -g file:~{random_primer_indexes} \
+#    -o ~{plate_id}-{name}-R1.fq.gz \
+#    -p ~{plate_id}-{name}-R2.fq.gz \
+#    r1.fastq.gz \
+#    r2.fastq.gz \
+#    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
+#
+#    # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
+#    rm ~{cromwell_root_dir}/*-unknown-R{1,2}.fq.gz
+#
+#    python3 <<CODE
+#    import re
+#    import os
+#
+#    # Parsing stats.txt file
+#    stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
+#    adapter_counts = {}
+#    with open(stats_file_path, 'r') as file:
+#        content = file.read()
+#
+#    adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
+#    for adapter_match in adapter_matches:
+#        adapter_name = adapter_match[0]
+#        trimmed_count = int(adapter_match[1])
+#        adapter_counts[adapter_name] = trimmed_count
+#
+#    # Removing fastq files with trimmed reads greater than 30
+#    directory_path = '~{cromwell_root_dir}'
+#    threshold = 10000000
+#
+#    for filename in os.listdir(directory_path):
+#        if filename.endswith('.fq.gz'):
+#            file_path = os.path.join(directory_path, filename)
+#            adapter_name = re.search(r'A(\d+)-R', filename)
+#            if adapter_name:
+#                adapter_name = 'A' + adapter_name.group(1)
+#                if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
+#                    os.remove(file_path)
+#                    print(f'Removed file: {filename}')
+#    CODE
+#
+#    # Batch the fastq files into folders of batch_number size
+#    batch_number=~{batch_number}
+#    for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
+#        mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
+#    done
+#
+#    # Counter for the folder index
+#    folder_index=1
+#
+#    # Define lists of r1 and r2 fq files
+#    R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
+#    R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
+#
+#    # Distribute the FASTQ files and create TAR files
+#    for file in "${R1_files[@]}"; do
+#        sample_id=$(basename "$file" "-R1.fq.gz")
+#        r2_file="${sample_id}-R2.fq.gz"
+#        mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
+#        mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
+#        # Increment the counter
+#        folder_index=$(( (folder_index % $batch_number) + 1 ))
+#    done
+#
+#    # Tar up files per batch
+#    echo "TAR files"
+#    for i in $(seq 1 "${batch_number}"); do
+#        tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
+#    done
+#    echo "TAR files created successfully."
+
   >>>
 
   runtime {

From 0f8c6e376c5ef3ef8ec239fabaf3312922050f1c Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 2 May 2024 13:33:47 -0400
Subject: [PATCH 061/186] more testing

---
 pipelines/skylab/snm3C/snm3C.wdl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 6493d31661..5a2b56786d 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -156,7 +156,7 @@ task Demultiplexing {
 
 
     set -euo pipefail
-
+    touch test.txt
     ls -lR
     pwd
 
@@ -249,8 +249,9 @@ task Demultiplexing {
   }
 
   output {
-    Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
-    File stats = "~{plate_id}.stats.txt"
+    #Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
+    #File stats = "~{plate_id}.stats.txt"
+    File test = "test.txt"
     }
 }
 

From 7eca252e95c961727e2561c1abdeac559f5d8002 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 2 May 2024 13:44:29 -0400
Subject: [PATCH 062/186] more testing

---
 pipelines/skylab/snm3C/snm3C.wdl | 67 --------------------------------
 1 file changed, 67 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 5a2b56786d..70d75baf0a 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -57,80 +57,13 @@ workflow snm3C {
             cromwell_root_dir = cromwell_root_dir
     }
 
-    scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
-        call Hisat_paired_end as Hisat_paired_end {
-          input:
-                tarred_demultiplexed_fastqs = tar,
-                tarred_index_files = tarred_index_files,
-                genome_fa = genome_fa,
-                chromosome_sizes = chromosome_sizes,
-                min_read_length = min_read_length,
-                r1_adapter = r1_adapter,
-                r2_adapter = r2_adapter,
-                r1_left_cut = r1_left_cut,
-                r1_right_cut = r1_right_cut,
-                r2_left_cut = r2_left_cut,
-                r2_right_cut = r2_right_cut,
-                plate_id = plate_id,
-                docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
-        }
-
-        call Hisat_single_end as Hisat_single_end {
-            input:
-                split_fq_tar = Hisat_paired_end.split_fq_tar,
-                tarred_index_files = tarred_index_files,
-                genome_fa = genome_fa,
-                plate_id = plate_id,
-                docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
-        }
-
-        call Merge_sort_analyze as Merge_sort_analyze {
-            input:
-               paired_end_unique_tar = Hisat_paired_end.unique_bam_tar,
-               read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar,     
-               genome_fa = genome_fa, 
-               num_upstr_bases = num_upstr_bases,
-               num_downstr_bases = num_downstr_bases,
-               compress_level = compress_level,
-               chromosome_sizes = chromosome_sizes,
-               plate_id = plate_id,
-               docker = docker_prefix + m3c_yap_hisat_docker,
-               cromwell_root_dir = cromwell_root_dir
-        }
-    }
 
-    call Summary {
-        input:
-            trimmed_stats = Hisat_paired_end.trim_stats_tar,
-            hisat3n_stats = Hisat_paired_end.hisat3n_paired_end_stats_tar,
-            r1_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R1_tar,
-            r2_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R2_tar,
-            dedup_stats = Merge_sort_analyze.dedup_stats_tar,
-            chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats,
-            allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
-            unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
-            plate_id = plate_id,
-            docker = docker_prefix + m3c_yap_hisat_docker,
-            cromwell_root_dir = cromwell_root_dir
-    }
 
     meta {
         allowNestedInputs: true
     }
 
     output {
-        File MappingSummary = Summary.mapping_summary
-        Array[File] name_sorted_bams = Merge_sort_analyze.name_sorted_bam
-        Array[File] unique_reads_cgn_extraction_allc= Merge_sort_analyze.allc
-        Array[File] unique_reads_cgn_extraction_tbi = Merge_sort_analyze.tbi
-        Array[File] reference_version = Hisat_paired_end.reference_version
-        Array[File] all_reads_dedup_contacts = Merge_sort_analyze.all_reads_dedup_contacts
-        Array[File] all_reads_3C_contacts = Merge_sort_analyze.all_reads_3C_contacts
-        Array[File] chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats
-        Array[File] unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar
-        Array[File] unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar
 
     }
 }

From abc62cd0c156e65b873d680c1362d1a4d84c02d9 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 2 May 2024 14:05:54 -0400
Subject: [PATCH 063/186] put things back

---
 pipelines/skylab/snm3C/snm3C.wdl | 228 ++++++++++++++++++++-----------
 1 file changed, 146 insertions(+), 82 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 70d75baf0a..0ba3f9bc10 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -57,13 +57,80 @@ workflow snm3C {
             cromwell_root_dir = cromwell_root_dir
     }
 
+    scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
+        call Hisat_paired_end as Hisat_paired_end {
+          input:
+                tarred_demultiplexed_fastqs = tar,
+                tarred_index_files = tarred_index_files,
+                genome_fa = genome_fa,
+                chromosome_sizes = chromosome_sizes,
+                min_read_length = min_read_length,
+                r1_adapter = r1_adapter,
+                r2_adapter = r2_adapter,
+                r1_left_cut = r1_left_cut,
+                r1_right_cut = r1_right_cut,
+                r2_left_cut = r2_left_cut,
+                r2_right_cut = r2_right_cut,
+                plate_id = plate_id,
+                docker = docker_prefix + m3c_yap_hisat_docker,
+                cromwell_root_dir = cromwell_root_dir
+        }
+
+        call Hisat_single_end as Hisat_single_end {
+            input:
+                split_fq_tar = Hisat_paired_end.split_fq_tar,
+                tarred_index_files = tarred_index_files,
+                genome_fa = genome_fa,
+                plate_id = plate_id,
+                docker = docker_prefix + m3c_yap_hisat_docker,
+                cromwell_root_dir = cromwell_root_dir
+        }
 
+        call Merge_sort_analyze as Merge_sort_analyze {
+            input:
+               paired_end_unique_tar = Hisat_paired_end.unique_bam_tar,
+               read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar,
+               genome_fa = genome_fa,
+               num_upstr_bases = num_upstr_bases,
+               num_downstr_bases = num_downstr_bases,
+               compress_level = compress_level,
+               chromosome_sizes = chromosome_sizes,
+               plate_id = plate_id,
+               docker = docker_prefix + m3c_yap_hisat_docker,
+               cromwell_root_dir = cromwell_root_dir
+        }
+    }
+
+    call Summary {
+        input:
+            trimmed_stats = Hisat_paired_end.trim_stats_tar,
+            hisat3n_stats = Hisat_paired_end.hisat3n_paired_end_stats_tar,
+            r1_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R1_tar,
+            r2_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R2_tar,
+            dedup_stats = Merge_sort_analyze.dedup_stats_tar,
+            chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats,
+            allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
+            unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
+            plate_id = plate_id,
+            docker = docker_prefix + m3c_yap_hisat_docker,
+            cromwell_root_dir = cromwell_root_dir
+    }
 
     meta {
         allowNestedInputs: true
     }
 
     output {
+        File MappingSummary = Summary.mapping_summary
+        Array[File] name_sorted_bams = Merge_sort_analyze.name_sorted_bam
+        Array[File] unique_reads_cgn_extraction_allc= Merge_sort_analyze.allc
+        Array[File] unique_reads_cgn_extraction_tbi = Merge_sort_analyze.tbi
+        Array[File] reference_version = Hisat_paired_end.reference_version
+        Array[File] all_reads_dedup_contacts = Merge_sort_analyze.all_reads_dedup_contacts
+        Array[File] all_reads_3C_contacts = Merge_sort_analyze.all_reads_3C_contacts
+        Array[File] chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats
+        Array[File] unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar
+        Array[File] unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar
 
     }
 }
@@ -86,90 +153,88 @@ task Demultiplexing {
 
   command <<<
     echo "TEST"
-
-
     set -euo pipefail
-    touch test.txt
+
     ls -lR
     pwd
 
 
-#    # Cat files for each r1, r2
-#    cat ~{sep=' ' fastq_input_read1} > ~{cromwell_root_dir}/r1.fastq.gz
-#    cat ~{sep=' ' fastq_input_read2} > ~{cromwell_root_dir}/r2.fastq.gz
-#
-#    # Run cutadapt
-#    /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \
-#    -g file:~{random_primer_indexes} \
-#    -o ~{plate_id}-{name}-R1.fq.gz \
-#    -p ~{plate_id}-{name}-R2.fq.gz \
-#    r1.fastq.gz \
-#    r2.fastq.gz \
-#    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
-#
-#    # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
-#    rm ~{cromwell_root_dir}/*-unknown-R{1,2}.fq.gz
-#
-#    python3 <<CODE
-#    import re
-#    import os
-#
-#    # Parsing stats.txt file
-#    stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
-#    adapter_counts = {}
-#    with open(stats_file_path, 'r') as file:
-#        content = file.read()
-#
-#    adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
-#    for adapter_match in adapter_matches:
-#        adapter_name = adapter_match[0]
-#        trimmed_count = int(adapter_match[1])
-#        adapter_counts[adapter_name] = trimmed_count
-#
-#    # Removing fastq files with trimmed reads greater than 30
-#    directory_path = '~{cromwell_root_dir}'
-#    threshold = 10000000
-#
-#    for filename in os.listdir(directory_path):
-#        if filename.endswith('.fq.gz'):
-#            file_path = os.path.join(directory_path, filename)
-#            adapter_name = re.search(r'A(\d+)-R', filename)
-#            if adapter_name:
-#                adapter_name = 'A' + adapter_name.group(1)
-#                if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
-#                    os.remove(file_path)
-#                    print(f'Removed file: {filename}')
-#    CODE
-#
-#    # Batch the fastq files into folders of batch_number size
-#    batch_number=~{batch_number}
-#    for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
-#        mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
-#    done
-#
-#    # Counter for the folder index
-#    folder_index=1
-#
-#    # Define lists of r1 and r2 fq files
-#    R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
-#    R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
-#
-#    # Distribute the FASTQ files and create TAR files
-#    for file in "${R1_files[@]}"; do
-#        sample_id=$(basename "$file" "-R1.fq.gz")
-#        r2_file="${sample_id}-R2.fq.gz"
-#        mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
-#        mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
-#        # Increment the counter
-#        folder_index=$(( (folder_index % $batch_number) + 1 ))
-#    done
-#
-#    # Tar up files per batch
-#    echo "TAR files"
-#    for i in $(seq 1 "${batch_number}"); do
-#        tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
-#    done
-#    echo "TAR files created successfully."
+    # Cat files for each r1, r2
+    cat ~{sep=' ' fastq_input_read1} > ~{cromwell_root_dir}/r1.fastq.gz
+    cat ~{sep=' ' fastq_input_read2} > ~{cromwell_root_dir}/r2.fastq.gz
+
+    # Run cutadapt
+    /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \
+    -g file:~{random_primer_indexes} \
+    -o ~{plate_id}-{name}-R1.fq.gz \
+    -p ~{plate_id}-{name}-R2.fq.gz \
+    r1.fastq.gz \
+    r2.fastq.gz \
+    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
+
+    # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
+    rm ~{cromwell_root_dir}/*-unknown-R{1,2}.fq.gz
+
+    python3 <<CODE
+    import re
+    import os
+
+    # Parsing stats.txt file
+    stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
+    adapter_counts = {}
+    with open(stats_file_path, 'r') as file:
+        content = file.read()
+
+    adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
+    for adapter_match in adapter_matches:
+        adapter_name = adapter_match[0]
+        trimmed_count = int(adapter_match[1])
+        adapter_counts[adapter_name] = trimmed_count
+
+    # Removing fastq files with trimmed reads greater than 30
+    directory_path = '~{cromwell_root_dir}'
+    threshold = 10000000
+
+    for filename in os.listdir(directory_path):
+        if filename.endswith('.fq.gz'):
+            file_path = os.path.join(directory_path, filename)
+            adapter_name = re.search(r'A(\d+)-R', filename)
+            if adapter_name:
+                adapter_name = 'A' + adapter_name.group(1)
+                if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
+                    os.remove(file_path)
+                    print(f'Removed file: {filename}')
+    CODE
+
+    # Batch the fastq files into folders of batch_number size
+    batch_number=~{batch_number}
+    for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
+        mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
+    done
+
+    # Counter for the folder index
+    folder_index=1
+
+    # Define lists of r1 and r2 fq files
+    R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
+    R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
+
+    # Distribute the FASTQ files and create TAR files
+    for file in "${R1_files[@]}"; do
+        sample_id=$(basename "$file" "-R1.fq.gz")
+        r2_file="${sample_id}-R2.fq.gz"
+        mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
+        mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
+        # Increment the counter
+        folder_index=$(( (folder_index % $batch_number) + 1 ))
+    done
+
+    # Tar up files per batch
+    echo "TAR files"
+    for i in $(seq 1 "${batch_number}"); do
+        tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
+    done
+    echo "TAR files created successfully."
 
   >>>
 
@@ -182,9 +247,8 @@ task Demultiplexing {
   }
 
   output {
-    #Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
-    #File stats = "~{plate_id}.stats.txt"
-    File test = "test.txt"
+    Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
+    File stats = "~{plate_id}.stats.txt"
     }
 }
 

From c465b4fc2eaab5fbde3aadc6f8b50fd6c6c40edb Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 3 May 2024 11:10:10 -0400
Subject: [PATCH 064/186] add lots more logging

---
 pipelines/skylab/snm3C/snm3C.wdl | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 0ba3f9bc10..427a37deff 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -172,9 +172,13 @@ task Demultiplexing {
     r2.fastq.gz \
     > ~{cromwell_root_dir}/~{plate_id}.stats.txt
 
+    echo "RAN CUT ADAPT"
+
     # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
     rm ~{cromwell_root_dir}/*-unknown-R{1,2}.fq.gz
 
+    echo "REMOVED FILES"
+
     python3 <<CODE
     import re
     import os
@@ -206,12 +210,16 @@ task Demultiplexing {
                     print(f'Removed file: {filename}')
     CODE
 
+    echo "RAN PYTHON SNIPPET"
+
     # Batch the fastq files into folders of batch_number size
     batch_number=~{batch_number}
     for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
         mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
     done
 
+    echo "BATCHED FASTQ FILES INTO FOLDERS"
+
     # Counter for the folder index
     folder_index=1
 
@@ -219,6 +227,8 @@ task Demultiplexing {
     R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
     R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
 
+    echo "STARTING TAR JOB"
+
     # Distribute the FASTQ files and create TAR files
     for file in "${R1_files[@]}"; do
         sample_id=$(basename "$file" "-R1.fq.gz")

From 84e185b1052cd9d09c6e250b73e718d6d2d2a707 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 3 May 2024 13:33:30 -0400
Subject: [PATCH 065/186] only run cutadapt

---
 pipelines/skylab/snm3C/snm3C.wdl | 98 ++++++++++++++++----------------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 427a37deff..cf210940d9 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -179,72 +179,72 @@ task Demultiplexing {
 
     echo "REMOVED FILES"
 
-    python3 <<CODE
-    import re
-    import os
+    #python3 <<CODE
+    #import re
+    #import os
 
     # Parsing stats.txt file
-    stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
-    adapter_counts = {}
-    with open(stats_file_path, 'r') as file:
-        content = file.read()
+    #stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
+    #adapter_counts = {}
+    #with open(stats_file_path, 'r') as file:
+    #    content = file.read()
 
-    adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
-    for adapter_match in adapter_matches:
-        adapter_name = adapter_match[0]
-        trimmed_count = int(adapter_match[1])
-        adapter_counts[adapter_name] = trimmed_count
+    #adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
+    #for adapter_match in adapter_matches:
+    #    adapter_name = adapter_match[0]
+    #    trimmed_count = int(adapter_match[1])
+    #    adapter_counts[adapter_name] = trimmed_count
 
     # Removing fastq files with trimmed reads greater than 30
-    directory_path = '~{cromwell_root_dir}'
-    threshold = 10000000
-
-    for filename in os.listdir(directory_path):
-        if filename.endswith('.fq.gz'):
-            file_path = os.path.join(directory_path, filename)
-            adapter_name = re.search(r'A(\d+)-R', filename)
-            if adapter_name:
-                adapter_name = 'A' + adapter_name.group(1)
-                if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
-                    os.remove(file_path)
-                    print(f'Removed file: {filename}')
-    CODE
-
-    echo "RAN PYTHON SNIPPET"
+    #directory_path = '~{cromwell_root_dir}'
+    #threshold = 10000000
+
+    #for filename in os.listdir(directory_path):
+    #    if filename.endswith('.fq.gz'):
+    #        file_path = os.path.join(directory_path, filename)
+    #        adapter_name = re.search(r'A(\d+)-R', filename)
+    #        if adapter_name:
+    #            adapter_name = 'A' + adapter_name.group(1)
+    #            if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
+    #                os.remove(file_path)
+    #                print(f'Removed file: {filename}')
+    #CODE
+
+    #echo "RAN PYTHON SNIPPET"
 
     # Batch the fastq files into folders of batch_number size
-    batch_number=~{batch_number}
-    for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
-        mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
-    done
+    #batch_number=~{batch_number}
+    #for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
+    #    mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
+    #done
 
-    echo "BATCHED FASTQ FILES INTO FOLDERS"
+    #echo "BATCHED FASTQ FILES INTO FOLDERS"
 
     # Counter for the folder index
-    folder_index=1
+    #folder_index=1
 
     # Define lists of r1 and r2 fq files
-    R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
-    R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
+    #R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
+    #R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
 
-    echo "STARTING TAR JOB"
+    #echo "STARTING TAR JOB"
 
     # Distribute the FASTQ files and create TAR files
-    for file in "${R1_files[@]}"; do
-        sample_id=$(basename "$file" "-R1.fq.gz")
-        r2_file="${sample_id}-R2.fq.gz"
-        mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
-        mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
+    #for file in "${R1_files[@]}"; do
+    #    sample_id=$(basename "$file" "-R1.fq.gz")
+    #    r2_file="${sample_id}-R2.fq.gz"
+    #    mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
+    #    mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
         # Increment the counter
-        folder_index=$(( (folder_index % $batch_number) + 1 ))
-    done
+    #    folder_index=$(( (folder_index % $batch_number) + 1 ))
+    #done
 
     # Tar up files per batch
-    echo "TAR files"
-    for i in $(seq 1 "${batch_number}"); do
-        tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
-    done
-    echo "TAR files created successfully."
+    #echo "TAR files"
+    #for i in $(seq 1 "${batch_number}"); do
+    #    tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
+    #done
+    #echo "TAR files created successfully."
 
   >>>
 
@@ -257,7 +257,7 @@ task Demultiplexing {
   }
 
   output {
-    Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
+    #Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
     File stats = "~{plate_id}.stats.txt"
     }
 }

From f545666541eb2bd08bf25b3c0169923c88a8e4be Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 3 May 2024 13:37:09 -0400
Subject: [PATCH 066/186] only run cutadapt

---
 pipelines/skylab/snm3C/snm3C.wdl | 152 +++++++++++++++----------------
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index cf210940d9..708acb018f 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -57,82 +57,82 @@ workflow snm3C {
             cromwell_root_dir = cromwell_root_dir
     }
 
-    scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
-        call Hisat_paired_end as Hisat_paired_end {
-          input:
-                tarred_demultiplexed_fastqs = tar,
-                tarred_index_files = tarred_index_files,
-                genome_fa = genome_fa,
-                chromosome_sizes = chromosome_sizes,
-                min_read_length = min_read_length,
-                r1_adapter = r1_adapter,
-                r2_adapter = r2_adapter,
-                r1_left_cut = r1_left_cut,
-                r1_right_cut = r1_right_cut,
-                r2_left_cut = r2_left_cut,
-                r2_right_cut = r2_right_cut,
-                plate_id = plate_id,
-                docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
-        }
-
-        call Hisat_single_end as Hisat_single_end {
-            input:
-                split_fq_tar = Hisat_paired_end.split_fq_tar,
-                tarred_index_files = tarred_index_files,
-                genome_fa = genome_fa,
-                plate_id = plate_id,
-                docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
-        }
-
-        call Merge_sort_analyze as Merge_sort_analyze {
-            input:
-               paired_end_unique_tar = Hisat_paired_end.unique_bam_tar,
-               read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar,
-               genome_fa = genome_fa,
-               num_upstr_bases = num_upstr_bases,
-               num_downstr_bases = num_downstr_bases,
-               compress_level = compress_level,
-               chromosome_sizes = chromosome_sizes,
-               plate_id = plate_id,
-               docker = docker_prefix + m3c_yap_hisat_docker,
-               cromwell_root_dir = cromwell_root_dir
-        }
-    }
-
-    call Summary {
-        input:
-            trimmed_stats = Hisat_paired_end.trim_stats_tar,
-            hisat3n_stats = Hisat_paired_end.hisat3n_paired_end_stats_tar,
-            r1_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R1_tar,
-            r2_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R2_tar,
-            dedup_stats = Merge_sort_analyze.dedup_stats_tar,
-            chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats,
-            allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
-            unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
-            plate_id = plate_id,
-            docker = docker_prefix + m3c_yap_hisat_docker,
-            cromwell_root_dir = cromwell_root_dir
-    }
-
-    meta {
-        allowNestedInputs: true
-    }
-
-    output {
-        File MappingSummary = Summary.mapping_summary
-        Array[File] name_sorted_bams = Merge_sort_analyze.name_sorted_bam
-        Array[File] unique_reads_cgn_extraction_allc= Merge_sort_analyze.allc
-        Array[File] unique_reads_cgn_extraction_tbi = Merge_sort_analyze.tbi
-        Array[File] reference_version = Hisat_paired_end.reference_version
-        Array[File] all_reads_dedup_contacts = Merge_sort_analyze.all_reads_dedup_contacts
-        Array[File] all_reads_3C_contacts = Merge_sort_analyze.all_reads_3C_contacts
-        Array[File] chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats
-        Array[File] unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar
-        Array[File] unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar
-
-    }
+    #scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
+    #    call Hisat_paired_end as Hisat_paired_end {
+    #      input:
+    #            tarred_demultiplexed_fastqs = tar,
+    #            tarred_index_files = tarred_index_files,
+    #            genome_fa = genome_fa,
+    #            chromosome_sizes = chromosome_sizes,
+    #            min_read_length = min_read_length,
+    #            r1_adapter = r1_adapter,
+    #            r2_adapter = r2_adapter,
+     #           r1_left_cut = r1_left_cut,
+     #           r1_right_cut = r1_right_cut,
+     #           r2_left_cut = r2_left_cut,
+     #           r2_right_cut = r2_right_cut,
+     #           plate_id = plate_id,
+     #           docker = docker_prefix + m3c_yap_hisat_docker,
+     #           cromwell_root_dir = cromwell_root_dir
+     #   }
+
+      #  call Hisat_single_end as Hisat_single_end {
+      #      input:
+      #          split_fq_tar = Hisat_paired_end.split_fq_tar,
+      #          tarred_index_files = tarred_index_files,
+      #          genome_fa = genome_fa,
+      #          plate_id = plate_id,
+      #          docker = docker_prefix + m3c_yap_hisat_docker,
+      #          cromwell_root_dir = cromwell_root_dir
+      #  }
+
+       # call Merge_sort_analyze as Merge_sort_analyze {
+       #     input:
+       #        paired_end_unique_tar = Hisat_paired_end.unique_bam_tar,
+       #        read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar,
+       #        genome_fa = genome_fa,
+       #        num_upstr_bases = num_upstr_bases,
+       #        num_downstr_bases = num_downstr_bases,
+       #        compress_level = compress_level,
+        #       chromosome_sizes = chromosome_sizes,
+        #       plate_id = plate_id,
+        #       docker = docker_prefix + m3c_yap_hisat_docker,
+        #       cromwell_root_dir = cromwell_root_dir
+        #}
+    #}
+
+    #call Summary {
+    #    input:
+    #        trimmed_stats = Hisat_paired_end.trim_stats_tar,
+    #        hisat3n_stats = Hisat_paired_end.hisat3n_paired_end_stats_tar,
+    #        r1_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R1_tar,
+    #        r2_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R2_tar,
+    #        dedup_stats = Merge_sort_analyze.dedup_stats_tar,
+    #        chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats,
+    #        allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
+    #        unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
+    #        plate_id = plate_id,
+    #        docker = docker_prefix + m3c_yap_hisat_docker,
+    #        cromwell_root_dir = cromwell_root_dir
+    #}
+
+    #meta {
+    #    allowNestedInputs: true
+    #}
+
+    #output {
+    #    File MappingSummary = Summary.mapping_summary
+    ##    Array[File] name_sorted_bams = Merge_sort_analyze.name_sorted_bam
+     #   Array[File] unique_reads_cgn_extraction_allc= Merge_sort_analyze.allc
+     #   Array[File] unique_reads_cgn_extraction_tbi = Merge_sort_analyze.tbi
+     #   Array[File] reference_version = Hisat_paired_end.reference_version
+     #   Array[File] all_reads_dedup_contacts = Merge_sort_analyze.all_reads_dedup_contacts
+     #   Array[File] all_reads_3C_contacts = Merge_sort_analyze.all_reads_3C_contacts
+     #   Array[File] chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats
+     #   Array[File] unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar
+     #   Array[File] unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar
+
+    #}
 }
 
 task Demultiplexing {

From 6de5e9e780e6a52cdbe1ebdd416288a3f53144f6 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 12:50:33 -0400
Subject: [PATCH 067/186] write files to current working dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 708acb018f..3bf1175ab6 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -157,11 +157,16 @@ task Demultiplexing {
 
     ls -lR
     pwd
-
+    working_directory = `pwd`
+    echo $working_directory
 
     # Cat files for each r1, r2
-    cat ~{sep=' ' fastq_input_read1} > ~{cromwell_root_dir}/r1.fastq.gz
-    cat ~{sep=' ' fastq_input_read2} > ~{cromwell_root_dir}/r2.fastq.gz
+    cat ~{sep=' ' fastq_input_read1} > $working_directory/r1.fastq.gz
+    cat ~{sep=' ' fastq_input_read2} > $working_directory/r2.fastq.gz
+
+    echo "successfully catted files"
+    pwd
+    ls
 
     # Run cutadapt
     /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \
@@ -170,7 +175,7 @@ task Demultiplexing {
     -p ~{plate_id}-{name}-R2.fq.gz \
     r1.fastq.gz \
     r2.fastq.gz \
-    > ~{cromwell_root_dir}/~{plate_id}.stats.txt
+    > $working_directory/~{plate_id}.stats.txt
 
     echo "RAN CUT ADAPT"
 

From bee07db56845850bef96f1a47c106a340184c1bd Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 13:11:47 -0400
Subject: [PATCH 068/186] write files to current working dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 3bf1175ab6..f9826ed4cc 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -157,7 +157,8 @@ task Demultiplexing {
 
     ls -lR
     pwd
-    working_directory = `pwd`
+    echo "setting directory"
+    working_directory=`pwd`
     echo $working_directory
 
     # Cat files for each r1, r2

From 19a95560b09db90e23b3eaae0afb6eeeea21a38f Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 13:27:13 -0400
Subject: [PATCH 069/186] write files to current working dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index f9826ed4cc..f5aae4b2d3 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -158,12 +158,12 @@ task Demultiplexing {
     ls -lR
     pwd
     echo "setting directory"
-    working_directory=`pwd`
-    echo $working_directory
+    WORKING_DIR=`pwd`
+    echo $WORKING_DIR
 
     # Cat files for each r1, r2
-    cat ~{sep=' ' fastq_input_read1} > $working_directory/r1.fastq.gz
-    cat ~{sep=' ' fastq_input_read2} > $working_directory/r2.fastq.gz
+    cat ~{sep=' ' fastq_input_read1} > $WORKING_DIR/r1.fastq.gz
+    cat ~{sep=' ' fastq_input_read2} > $WORKING_DIR/r2.fastq.gz
 
     echo "successfully catted files"
     pwd
@@ -174,14 +174,14 @@ task Demultiplexing {
     -g file:~{random_primer_indexes} \
     -o ~{plate_id}-{name}-R1.fq.gz \
     -p ~{plate_id}-{name}-R2.fq.gz \
-    r1.fastq.gz \
-    r2.fastq.gz \
-    > $working_directory/~{plate_id}.stats.txt
+    $WORKING_DIR/r1.fastq.gz \
+    $WORKING_DIR/r2.fastq.gz \
+    > $WORKING_DIR/~{plate_id}.stats.txt
 
     echo "RAN CUT ADAPT"
 
     # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
-    rm ~{cromwell_root_dir}/*-unknown-R{1,2}.fq.gz
+    rm $WORKING_DIR/*-unknown-R{1,2}.fq.gz
 
     echo "REMOVED FILES"
 

From 0605e65303708b9941a45f153cb1e82159edde58 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 13:55:54 -0400
Subject: [PATCH 070/186] add some of demultiplexing steps back in

---
 pipelines/skylab/snm3C/snm3C.wdl | 105 ++++++++++++++++---------------
 1 file changed, 55 insertions(+), 50 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index f5aae4b2d3..ee83007f16 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -185,72 +185,77 @@ task Demultiplexing {
 
     echo "REMOVED FILES"
 
-    #python3 <<CODE
-    #import re
-    #import os
+    python3 <<CODE
+    import re
+    import os
 
     # Parsing stats.txt file
-    #stats_file_path = '~{cromwell_root_dir}/~{plate_id}.stats.txt'
-    #adapter_counts = {}
-    #with open(stats_file_path, 'r') as file:
-    #    content = file.read()
-
-    #adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
-    #for adapter_match in adapter_matches:
-    #    adapter_name = adapter_match[0]
-    #    trimmed_count = int(adapter_match[1])
-    #    adapter_counts[adapter_name] = trimmed_count
+    working_dir = os.getcwd()
+    stats_file_path = os.path.join(working_dir, ~{plate_id}.stats.txt')
+    adapter_counts = {}
+    with open(stats_file_path, 'r') as file:
+        content = file.read()
+    print("opened stats file)
+    adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
+    for adapter_match in adapter_matches:
+        adapter_name = adapter_match[0]
+        trimmed_count = int(adapter_match[1])
+        adapter_counts[adapter_name] = trimmed_count
 
     # Removing fastq files with trimmed reads greater than 30
-    #directory_path = '~{cromwell_root_dir}'
-    #threshold = 10000000
-
-    #for filename in os.listdir(directory_path):
-    #    if filename.endswith('.fq.gz'):
-    #        file_path = os.path.join(directory_path, filename)
-    #        adapter_name = re.search(r'A(\d+)-R', filename)
-    #        if adapter_name:
-    #            adapter_name = 'A' + adapter_name.group(1)
-    #            if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
-    #                os.remove(file_path)
-    #                print(f'Removed file: {filename}')
-    #CODE
-
-    #echo "RAN PYTHON SNIPPET"
+    threshold = 10000000
+
+    # TODO remove these prints:
+    all_fastqs = os.listdir(working_dir)
+    print(f"all fastq files: {all_fastqs}"
+
+    for filename in os.listdir(working_dir):
+        if filename.endswith('.fq.gz'):
+            file_path = os.path.join(working_dir, filename)
+            adapter_name = re.search(r'A(\d+)-R', filename)
+            if adapter_name:
+                adapter_name = 'A' + adapter_name.group(1)
+                if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
+                    os.remove(file_path)
+                    print(f'Removed file: {filename}')
+    CODE
+
+    echo "RAN PYTHON SNIPPET"
 
     # Batch the fastq files into folders of batch_number size
-    #batch_number=~{batch_number}
-    #for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
-    #    mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
-    #done
+    batch_number=~{batch_number}
+    for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
+        mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
+    done
 
-    #echo "BATCHED FASTQ FILES INTO FOLDERS"
+    echo "BATCHED FASTQ FILES INTO FOLDERS"
 
     # Counter for the folder index
-    #folder_index=1
+    folder_index=1
+    WORKING_DIR=`pwd`
 
     # Define lists of r1 and r2 fq files
-    #R1_files=($(ls ~{cromwell_root_dir} | grep "\-R1.fq.gz"))
-    #R2_files=($(ls ~{cromwell_root_dir} | grep "\-R2.fq.gz"))
+    R1_files=($(ls $WORKING_DIR | grep "\-R1.fq.gz"))
+    R2_files=($(ls $WORKING_DIR | grep "\-R2.fq.gz"))
 
-    #echo "STARTING TAR JOB"
+    echo "STARTING TAR JOB"
 
     # Distribute the FASTQ files and create TAR files
-    #for file in "${R1_files[@]}"; do
-    #    sample_id=$(basename "$file" "-R1.fq.gz")
-    #    r2_file="${sample_id}-R2.fq.gz"
-    #    mv ~{cromwell_root_dir}/$file batch$((folder_index))/$file
-    #    mv ~{cromwell_root_dir}/$r2_file batch$((folder_index))/$r2_file
+    for file in "${R1_files[@]}"; do
+        sample_id=$(basename "$file" "-R1.fq.gz")
+        r2_file="${sample_id}-R2.fq.gz"
+        mv $WORKING_DIR/$file batch$((folder_index))/$file
+        mv $WORKING_DIR/$r2_file batch$((folder_index))/$r2_file
         # Increment the counter
-    #    folder_index=$(( (folder_index % $batch_number) + 1 ))
-    #done
+        folder_index=$(( (folder_index % $batch_number) + 1 ))
+    done
 
     # Tar up files per batch
-    #echo "TAR files"
-    #for i in $(seq 1 "${batch_number}"); do
-    #    tar -cf - ~{cromwell_root_dir}/batch${i}/*.fq.gz | pigz > ~{cromwell_root_dir}/~{plate_id}.${i}.cutadapt_output_files.tar.gz
-    #done
-    #echo "TAR files created successfully."
+    echo "TAR files"
+    for i in $(seq 1 "${batch_number}"); do
+        tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz
+    done
+    echo "TAR files created successfully."
 
   >>>
 
@@ -263,7 +268,7 @@ task Demultiplexing {
   }
 
   output {
-    #Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
+    Array[File] tarred_demultiplexed_fastqs = glob("*.tar.gz")
     File stats = "~{plate_id}.stats.txt"
     }
 }

From e2f98c85dc78478b19da78eca007f1170f54b0ff Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 14:04:31 -0400
Subject: [PATCH 071/186] typo

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index ee83007f16..e74e651d05 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -191,7 +191,7 @@ task Demultiplexing {
 
     # Parsing stats.txt file
     working_dir = os.getcwd()
-    stats_file_path = os.path.join(working_dir, ~{plate_id}.stats.txt')
+    stats_file_path = os.path.join(working_dir, '~{plate_id}.stats.txt')
     adapter_counts = {}
     with open(stats_file_path, 'r') as file:
         content = file.read()

From 1d8745cabf1664e246f42fe24103c89f216f5917 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 14:14:54 -0400
Subject: [PATCH 072/186] typo

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index e74e651d05..4b381407e5 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -195,7 +195,7 @@ task Demultiplexing {
     adapter_counts = {}
     with open(stats_file_path, 'r') as file:
         content = file.read()
-    print("opened stats file)
+    print("opened stats file")
     adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
     for adapter_match in adapter_matches:
         adapter_name = adapter_match[0]

From e672e3de0a7a96734b223eb62c7223e173e31cb9 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 14:24:44 -0400
Subject: [PATCH 073/186] typo

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 4b381407e5..8e8df191ca 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -207,7 +207,7 @@ task Demultiplexing {
 
     # TODO remove these prints:
     all_fastqs = os.listdir(working_dir)
-    print(f"all fastq files: {all_fastqs}"
+    print(f"all fastq files: {all_fastqs}")
 
     for filename in os.listdir(working_dir):
         if filename.endswith('.fq.gz'):

From 14b6958247dd0573d495ceb7aeb9b13d2966db51 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 14:29:20 -0400
Subject: [PATCH 074/186] remove print

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 8e8df191ca..47bfb38483 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -207,7 +207,7 @@ task Demultiplexing {
 
     # TODO remove these prints:
     all_fastqs = os.listdir(working_dir)
-    print(f"all fastq files: {all_fastqs}")
+
 
     for filename in os.listdir(working_dir):
         if filename.endswith('.fq.gz'):

From 84a8777b5e05c2afdfddb898d9d184afff05ec8c Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 6 May 2024 15:10:43 -0400
Subject: [PATCH 075/186] uncomment rest of workflow

---
 pipelines/skylab/snm3C/snm3C.wdl | 196 +++++++++++++------------------
 1 file changed, 84 insertions(+), 112 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 47bfb38483..059e80a897 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -54,85 +54,84 @@ workflow snm3C {
             plate_id = plate_id,
             batch_number = batch_number,
             docker = docker_prefix + m3c_yap_hisat_docker,
+    }
+
+    scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
+        call Hisat_paired_end as Hisat_paired_end {
+          input:
+                tarred_demultiplexed_fastqs = tar,
+                tarred_index_files = tarred_index_files,
+                genome_fa = genome_fa,
+                chromosome_sizes = chromosome_sizes,
+                min_read_length = min_read_length,
+                r1_adapter = r1_adapter,
+                r2_adapter = r2_adapter,
+                r1_left_cut = r1_left_cut,
+                r1_right_cut = r1_right_cut,
+                r2_left_cut = r2_left_cut,
+                r2_right_cut = r2_right_cut,
+                plate_id = plate_id,
+                docker = docker_prefix + m3c_yap_hisat_docker,
+                cromwell_root_dir = cromwell_root_dir
+        }
+
+        call Hisat_single_end as Hisat_single_end {
+            input:
+                split_fq_tar = Hisat_paired_end.split_fq_tar,
+                tarred_index_files = tarred_index_files,
+                genome_fa = genome_fa,
+                plate_id = plate_id,
+                docker = docker_prefix + m3c_yap_hisat_docker,
+                cromwell_root_dir = cromwell_root_dir
+        }
+
+        call Merge_sort_analyze as Merge_sort_analyze {
+            input:
+               paired_end_unique_tar = Hisat_paired_end.unique_bam_tar,
+               read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar,
+               genome_fa = genome_fa,
+               num_upstr_bases = num_upstr_bases,
+               num_downstr_bases = num_downstr_bases,
+               compress_level = compress_level,
+               chromosome_sizes = chromosome_sizes,
+               plate_id = plate_id,
+               docker = docker_prefix + m3c_yap_hisat_docker,
+               cromwell_root_dir = cromwell_root_dir
+        }
+    }
+
+    call Summary {
+        input:
+            trimmed_stats = Hisat_paired_end.trim_stats_tar,
+            hisat3n_stats = Hisat_paired_end.hisat3n_paired_end_stats_tar,
+            r1_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R1_tar,
+            r2_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R2_tar,
+            dedup_stats = Merge_sort_analyze.dedup_stats_tar,
+            chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats,
+            allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
+            unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
+            plate_id = plate_id,
+            docker = docker_prefix + m3c_yap_hisat_docker,
             cromwell_root_dir = cromwell_root_dir
     }
 
-    #scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) {
-    #    call Hisat_paired_end as Hisat_paired_end {
-    #      input:
-    #            tarred_demultiplexed_fastqs = tar,
-    #            tarred_index_files = tarred_index_files,
-    #            genome_fa = genome_fa,
-    #            chromosome_sizes = chromosome_sizes,
-    #            min_read_length = min_read_length,
-    #            r1_adapter = r1_adapter,
-    #            r2_adapter = r2_adapter,
-     #           r1_left_cut = r1_left_cut,
-     #           r1_right_cut = r1_right_cut,
-     #           r2_left_cut = r2_left_cut,
-     #           r2_right_cut = r2_right_cut,
-     #           plate_id = plate_id,
-     #           docker = docker_prefix + m3c_yap_hisat_docker,
-     #           cromwell_root_dir = cromwell_root_dir
-     #   }
-
-      #  call Hisat_single_end as Hisat_single_end {
-      #      input:
-      #          split_fq_tar = Hisat_paired_end.split_fq_tar,
-      #          tarred_index_files = tarred_index_files,
-      #          genome_fa = genome_fa,
-      #          plate_id = plate_id,
-      #          docker = docker_prefix + m3c_yap_hisat_docker,
-      #          cromwell_root_dir = cromwell_root_dir
-      #  }
-
-       # call Merge_sort_analyze as Merge_sort_analyze {
-       #     input:
-       #        paired_end_unique_tar = Hisat_paired_end.unique_bam_tar,
-       #        read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar,
-       #        genome_fa = genome_fa,
-       #        num_upstr_bases = num_upstr_bases,
-       #        num_downstr_bases = num_downstr_bases,
-       #        compress_level = compress_level,
-        #       chromosome_sizes = chromosome_sizes,
-        #       plate_id = plate_id,
-        #       docker = docker_prefix + m3c_yap_hisat_docker,
-        #       cromwell_root_dir = cromwell_root_dir
-        #}
-    #}
-
-    #call Summary {
-    #    input:
-    #        trimmed_stats = Hisat_paired_end.trim_stats_tar,
-    #        hisat3n_stats = Hisat_paired_end.hisat3n_paired_end_stats_tar,
-    #        r1_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R1_tar,
-    #        r2_hisat3n_stats = Hisat_single_end.hisat3n_dna_split_reads_summary_R2_tar,
-    #        dedup_stats = Merge_sort_analyze.dedup_stats_tar,
-    #        chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats,
-    #        allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats,
-    #        unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
-    #        plate_id = plate_id,
-    #        docker = docker_prefix + m3c_yap_hisat_docker,
-    #        cromwell_root_dir = cromwell_root_dir
-    #}
-
-    #meta {
-    #    allowNestedInputs: true
-    #}
-
-    #output {
-    #    File MappingSummary = Summary.mapping_summary
-    ##    Array[File] name_sorted_bams = Merge_sort_analyze.name_sorted_bam
-     #   Array[File] unique_reads_cgn_extraction_allc= Merge_sort_analyze.allc
-     #   Array[File] unique_reads_cgn_extraction_tbi = Merge_sort_analyze.tbi
-     #   Array[File] reference_version = Hisat_paired_end.reference_version
-     #   Array[File] all_reads_dedup_contacts = Merge_sort_analyze.all_reads_dedup_contacts
-     #   Array[File] all_reads_3C_contacts = Merge_sort_analyze.all_reads_3C_contacts
-     #   Array[File] chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats
-     #   Array[File] unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar
-     #   Array[File] unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar
-
-    #}
+    meta {
+        allowNestedInputs: true
+    }
+
+    output {
+        File MappingSummary = Summary.mapping_summary
+        Array[File] name_sorted_bams = Merge_sort_analyze.name_sorted_bam
+        Array[File] unique_reads_cgn_extraction_allc= Merge_sort_analyze.allc
+        Array[File] unique_reads_cgn_extraction_tbi = Merge_sort_analyze.tbi
+        Array[File] reference_version = Hisat_paired_end.reference_version
+        Array[File] all_reads_dedup_contacts = Merge_sort_analyze.all_reads_dedup_contacts
+        Array[File] all_reads_3C_contacts = Merge_sort_analyze.all_reads_3C_contacts
+        Array[File] chromatin_contact_stats = Merge_sort_analyze.chromatin_contact_stats
+        Array[File] unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar
+        Array[File] unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar
+
+    }
 }
 
 task Demultiplexing {
@@ -143,7 +142,6 @@ task Demultiplexing {
     String plate_id
     Int batch_number
     String docker
-    String cromwell_root_dir
 
     Int disk_size = 1000
     Int mem_size = 10
@@ -152,23 +150,13 @@ task Demultiplexing {
   }
 
   command <<<
-    echo "TEST"
     set -euo pipefail
-
-    ls -lR
-    pwd
-    echo "setting directory"
     WORKING_DIR=`pwd`
-    echo $WORKING_DIR
 
     # Cat files for each r1, r2
     cat ~{sep=' ' fastq_input_read1} > $WORKING_DIR/r1.fastq.gz
     cat ~{sep=' ' fastq_input_read2} > $WORKING_DIR/r2.fastq.gz
 
-    echo "successfully catted files"
-    pwd
-    ls
-
     # Run cutadapt
     /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \
     -g file:~{random_primer_indexes} \
@@ -178,13 +166,9 @@ task Demultiplexing {
     $WORKING_DIR/r2.fastq.gz \
     > $WORKING_DIR/~{plate_id}.stats.txt
 
-    echo "RAN CUT ADAPT"
-
     # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz
     rm $WORKING_DIR/*-unknown-R{1,2}.fq.gz
 
-    echo "REMOVED FILES"
-
     python3 <<CODE
     import re
     import os
@@ -195,7 +179,7 @@ task Demultiplexing {
     adapter_counts = {}
     with open(stats_file_path, 'r') as file:
         content = file.read()
-    print("opened stats file")
+
     adapter_matches = re.findall(r'=== First read: Adapter (\w+) ===\n\nSequence: .+; Type: .+; Length: \d+; Trimmed: (\d+) times', content)
     for adapter_match in adapter_matches:
         adapter_name = adapter_match[0]
@@ -205,10 +189,6 @@ task Demultiplexing {
     # Removing fastq files with trimmed reads greater than 30
     threshold = 10000000
 
-    # TODO remove these prints:
-    all_fastqs = os.listdir(working_dir)
-
-
     for filename in os.listdir(working_dir):
         if filename.endswith('.fq.gz'):
             file_path = os.path.join(working_dir, filename)
@@ -220,16 +200,12 @@ task Demultiplexing {
                     print(f'Removed file: {filename}')
     CODE
 
-    echo "RAN PYTHON SNIPPET"
-
     # Batch the fastq files into folders of batch_number size
     batch_number=~{batch_number}
     for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
         mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
     done
 
-    echo "BATCHED FASTQ FILES INTO FOLDERS"
-
     # Counter for the folder index
     folder_index=1
     WORKING_DIR=`pwd`
@@ -238,8 +214,6 @@ task Demultiplexing {
     R1_files=($(ls $WORKING_DIR | grep "\-R1.fq.gz"))
     R2_files=($(ls $WORKING_DIR | grep "\-R2.fq.gz"))
 
-    echo "STARTING TAR JOB"
-
     # Distribute the FASTQ files and create TAR files
     for file in "${R1_files[@]}"; do
         sample_id=$(basename "$file" "-R1.fq.gz")
@@ -251,12 +225,9 @@ task Demultiplexing {
     done
 
     # Tar up files per batch
-    echo "TAR files"
     for i in $(seq 1 "${batch_number}"); do
         tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz
     done
-    echo "TAR files created successfully."
-
   >>>
 
   runtime {
@@ -301,6 +272,7 @@ task Hisat_paired_end {
         set -euo pipefail
         set -x
         lscpu
+        WORKING_DIR=`pwd`
   
         # check genomic reference version and print to output txt file
         STRING=~{genome_fa}
@@ -349,7 +321,7 @@ task Hisat_paired_end {
           # sort 
           start=$(date +%s)
           echo "Run sort r1"
-          zcat ~{cromwell_root_dir}/batch*/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
+          zcat $WORKING_DIR/batch*/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run sort r1: $elapsed seconds"
@@ -357,7 +329,7 @@ task Hisat_paired_end {
           # sort 
           start=$(date +%s)
           echo "Run sort r2"
-          zcat ~{cromwell_root_dir}/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
+          zcat $WORKING_DIR/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run sort r2: $elapsed seconds"
@@ -376,10 +348,10 @@ task Hisat_paired_end {
           -Z \
           -m ${min_read_length}:${min_read_length} \
           --pair-filter 'both' \
-          -o ${sample_id}-R1_trimmed.fq.gz \
-          -p ${sample_id}-R2_trimmed.fq.gz \
-          ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \
-          > ${sample_id}.trimmed.stats.txt
+          -o $WORKING_DIR/${sample_id}-R1_trimmed.fq.gz \
+          -p $WORKING_DIR/${sample_id}-R2_trimmed.fq.gz \
+          $WORKING_DIR/${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \
+          > $WORKING_DIR/${sample_id}.trimmed.stats.txt
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run cutadapt: $elapsed seconds"
@@ -574,7 +546,7 @@ task Hisat_single_end {
         end=$(date +%s) 
         elapsed=$((end - start)) 
         echo "Elapsed time to untar split_fq_tar: $elapsed seconds"
-      
+
         # make directories 
         mkdir -p ~{cromwell_root_dir}/merged_sort_bams
         mkdir -p ~{cromwell_root_dir}/read_overlap

From a4386ce7bc4dbe59a1d9dd149f2f6ff702621ace Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 7 May 2024 09:22:01 -0400
Subject: [PATCH 076/186] add working dir to batch subdir

---
 pipelines/skylab/snm3C/snm3C.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 059e80a897..9636e3d13e 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -391,7 +391,7 @@ task Hisat_paired_end {
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
           
-          rm ~{cromwell_root_dir}/batch*/${sample_id}-R1.fq.gz ~{cromwell_root_dir}/batch*/${sample_id}-R2.fq.gz
+          rm $WORKING_DIR/batch*/${sample_id}-R1.fq.gz $WORKING_DIR/batch*/${sample_id}-R2.fq.gz
           rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
           rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz
           rm ${sample_id}.hisat3n_dna.unsort.bam ${sample_id}.hisat3n_dna.multi_aligned.bam
@@ -399,8 +399,8 @@ task Hisat_paired_end {
        }
 
       # define lists of r1 and r2 fq files
-      R1_files=($(ls batch*/ | grep "\-R1.fq.gz"))
-      R2_files=($(ls batch*/ | grep "\-R2.fq.gz"))
+      R1_files=($(ls $WORKING_DIR/batch*/ | grep "\-R1.fq.gz"))
+      R2_files=($(ls $WORKING_DIR/batch*/ | grep "\-R2.fq.gz"))
 
       # for file in "${R1_files[@]}"; do
       # (

From 2098784872003ceec15b8c827ffe0d745dedf7b0 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 7 May 2024 10:55:15 -0400
Subject: [PATCH 077/186] ls cromwell root batch

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 9636e3d13e..0ccea08aee 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -399,8 +399,8 @@ task Hisat_paired_end {
        }
 
       # define lists of r1 and r2 fq files
-      R1_files=($(ls $WORKING_DIR/batch*/ | grep "\-R1.fq.gz"))
-      R2_files=($(ls $WORKING_DIR/batch*/ | grep "\-R2.fq.gz"))
+      R1_files=($(ls ~{cromwell_root_dir}/batch*/ | grep "\-R1.fq.gz"))
+      R2_files=($(ls ~{cromwell_root_dir}/batch*/ | grep "\-R2.fq.gz"))
 
       # for file in "${R1_files[@]}"; do
       # (

From 7704e7d1b5a7317b260aab73d4de037ecc3a27b8 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 7 May 2024 12:08:44 -0400
Subject: [PATCH 078/186] ls directories to find batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 0ccea08aee..1e4b3d58ff 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -309,6 +309,11 @@ task Hisat_paired_end {
         end=$(date +%s) 
         elapsed=$((end - start)) 
         echo "Elapsed time to untar: $elapsed seconds"
+
+        echo "lsing current dir:"
+        ls -lR
+        echo "lsing cromwell root:"
+        ls -lR ~{cromwell_root_dir}
     
         task() {
           local file=$1

From 63466c5087175010b2e06376b8a2ddc8ad899046 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 7 May 2024 13:11:56 -0400
Subject: [PATCH 079/186] update changelogs

---
 pipelines/skylab/multiome/Multiome.wdl | 2 +-
 pipelines/skylab/optimus/Optimus.wdl   | 2 +-
 pipelines/skylab/slideseq/SlideSeq.wdl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 8bfd9c7222..e8b901fad9 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "3.4.3"
+    String pipeline_version = "3.4.4"
 
     input {
         String cloud_provider
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index d1965fdd9b..b4b0196f89 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -68,7 +68,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "6.6.2"
+  String pipeline_version = "6.6.3"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 8005922895..0502a32fcd 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.5"
+    String pipeline_version = "3.1.6"
 
     input {
         Array[File] r1_fastq

From 5ba8021c1572d2f64b3a8c1ee7aa887f3d81efa0 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 7 May 2024 13:29:46 -0400
Subject: [PATCH 080/186] set batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 1e4b3d58ff..15bd22d844 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -314,7 +314,16 @@ task Hisat_paired_end {
         ls -lR
         echo "lsing cromwell root:"
         ls -lR ~{cromwell_root_dir}
-    
+
+        # define lists of r1 and r2 fq files
+        if [ ~{cromwell_root_dir} = "gcp" ]; then
+            batch_dir="batch*/"
+        else
+            batch_dir="/~{cromwell_root_dir}/*/*/call-Demultiplexing/execution/batch*/"
+        fi
+        echo "batchdirectory: $batch_dir"
+
+
         task() {
           local file=$1
           sample_id=$(basename "$file" "-R1.fq.gz")
@@ -326,7 +335,7 @@ task Hisat_paired_end {
           # sort 
           start=$(date +%s)
           echo "Run sort r1"
-          zcat $WORKING_DIR/batch*/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
+          zcat $batch_dir/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run sort r1: $elapsed seconds"
@@ -334,7 +343,7 @@ task Hisat_paired_end {
           # sort 
           start=$(date +%s)
           echo "Run sort r2"
-          zcat $WORKING_DIR/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
+          zcat $batch_dir/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run sort r2: $elapsed seconds"
@@ -396,30 +405,33 @@ task Hisat_paired_end {
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
           
-          rm $WORKING_DIR/batch*/${sample_id}-R1.fq.gz $WORKING_DIR/batch*/${sample_id}-R2.fq.gz
+          rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/batch*/${sample_id}-R2.fq.gz
           rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
           rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz
           rm ${sample_id}.hisat3n_dna.unsort.bam ${sample_id}.hisat3n_dna.multi_aligned.bam
           rm ${sample_id}.hisat3n_dna.unmapped.fastq
        }
 
-      # define lists of r1 and r2 fq files
-      R1_files=($(ls ~{cromwell_root_dir}/batch*/ | grep "\-R1.fq.gz"))
-      R2_files=($(ls ~{cromwell_root_dir}/batch*/ | grep "\-R2.fq.gz"))
+
+      R1_files=($(ls $batch_dir | grep "\-R1.fq.gz"))
+      R2_files=($(ls $batch_dir | grep "\-R2.fq.gz"))
+
+      echo "r1 files: $R1_files"
+      echo "r2 files: $R2_files"
 
       # for file in "${R1_files[@]}"; do
       # (
       #   echo "starting task $file.."
       #   du -h  batch*/$file
       #   task "$file"
-      # ) 
+      # )
       # done
 
       # run 6 instances of task in parallel 
       for file in "${R1_files[@]}"; do
         (
           echo "starting task $file.."
-          du -h  batch*/$file
+          du -h  $batch_dir/$file
           task "$file"
           sleep $(( (RANDOM % 3) + 1))
         ) &

From 988a92048ba7568118b620c90ff4ac3c7cee9fe0 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 7 May 2024 13:55:27 -0400
Subject: [PATCH 081/186] remove extra leading slash

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 15bd22d844..22d8fdfcc6 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -319,7 +319,7 @@ task Hisat_paired_end {
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
-            batch_dir="/~{cromwell_root_dir}/*/*/call-Demultiplexing/execution/batch*/"
+            batch_dir="~{cromwell_root_dir}/*/*/call-Demultiplexing/execution/batch*/"
         fi
         echo "batchdirectory: $batch_dir"
 

From 03f5870e20e627ba4131e77b8b3abf3e357bc667 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Wed, 8 May 2024 09:54:26 -0400
Subject: [PATCH 082/186] fixing what i messed up in resvoling conflicts

---
 tasks/skylab/StarAlign.wdl | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 1ad3126d21..d6fe440302 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -344,8 +344,11 @@ task STARsoloFastq {
     then
       SoloDirectory="Solo.out/Gene/raw"
       echo "SoloDirectory is $SoloDirectory"
-      find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
-      find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+      #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
+      #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+      echo "list matrix files in $SoloDirectory"
+      ls "$SoloDirectory"/*.mtx
+      mv $SoloDirectory/matrix.mtx matrix.mtx
       mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv
       mv "Solo.out/Gene/raw/features.tsv" features.tsv
       mv "Solo.out/Gene/CellReads.stats" CellReads.stats
@@ -358,8 +361,11 @@ task STARsoloFastq {
       then
         SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw"
         echo "SoloDirectory is $SoloDirectory"
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{}  echo mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        echo "list matrix files in $SoloDirectory"
+        ls "$SoloDirectory"/*.mtx
+        mv $SoloDirectory/matrix.mtx matrix.mtx
         mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
         mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
         mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats
@@ -369,12 +375,18 @@ task STARsoloFastq {
       else
         SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw"
         echo "SoloDirectory is $SoloDirectory"
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
+        echo "list matrix files in $SoloDirectory"
+        ls "$SoloDirectory"/*.mtx
+        mv $SoloDirectory/matrix.mtx matrix.mtx
         SoloDirectory="Solo.out/Gene/raw"
         echo "SoloDirectory is $SoloDirectory"
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx";  echo mv {} "/cromwell_root/$new_name"'
-        find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"'
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx";  echo mv {} "/cromwell_root/$new_name"'
+        #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"'
+        echo "list matrix files in $SoloDirectory"
+        ls "$SoloDirectory"/*.mtx
+        mv $SoloDirectory/matrix.mtx matrix_sn_rna.mtx
         mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
         mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
         mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats

From cd9d4608dff063c8df969bf7c8d28127671693ec Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 8 May 2024 16:26:40 -0400
Subject: [PATCH 083/186] fix batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 22d8fdfcc6..689bba7849 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -319,7 +319,7 @@ task Hisat_paired_end {
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
-            batch_dir="~{cromwell_root_dir}/*/*/call-Demultiplexing/execution/batch*/"
+            batch_dir="~{cromwell_root_dir}/*/*/*/*/*/~{cromwell_root_dir}/*/*/*/*/batch*/"
         fi
         echo "batchdirectory: $batch_dir"
 

From e63ff3ddc3120690c944e3f46f1a31e761e1884c Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 9 May 2024 09:27:46 -0400
Subject: [PATCH 084/186] need to loop through array

---
 tasks/skylab/FastqProcessing.wdl | 45 ++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl
index c7ae558cf2..20a7169d29 100644
--- a/tasks/skylab/FastqProcessing.wdl
+++ b/tasks/skylab/FastqProcessing.wdl
@@ -294,16 +294,39 @@ task FastqProcessATAC {
 
         echo $read1_fastq_files
         # Make downsample fq for barcode orientation check of R2 barcodes
-        mkdir input_fastq
-        mv $read1_fastq_files input_fastq/
-        mv $read2_fastq_files input_fastq/
-        mv $read3_fastq_files input_fastq/
+        mkdir -p input_fastqs
 
-        #gcloud storage cp $read1_fastq_files /cromwell_root/input_fastq
-        #gcloud storage cp $read2_fastq_files /cromwell_root/input_fastq
-        #gcloud storage cp $read3_fastq_files /cromwell_root/input_fastq
+        # Function to move files into the input_fastqs directory
+        move_files_to_input_dir() {
+            local -n array=$1  # Reference to the array passed as argument
+            local destination_dir=$2
 
-        path="input_fastq/"
+            for file in "${array[@]}"; do
+                if [ -f "$file" ]; then  # Check if file exists
+                    echo "Moving $file to $destination_dir"
+                    mv "$file" "$destination_dir"
+                else
+                    echo "File $file not found"
+                fi
+            done
+        }
+
+        # Move files from FASTQ1_ARRAY to input_fastqs directory
+        move_files_to_input_dir FASTQ1_ARRAY input_fastqs
+
+        # Move files from FASTQ2_ARRAY to input_fastqs directory
+        move_files_to_input_dir FASTQ2_ARRAY input_fastqs
+
+        # Move files from FASTQ3_ARRAY to input_fastqs directory
+        move_files_to_input_dir FASTQ3_ARRAY input_fastqs
+
+        echo "All files moved to input_fastqs directory"
+
+        #gcloud storage cp $read1_fastq_files /cromwell_root/input_fastqs
+        #gcloud storage cp $read2_fastq_files /cromwell_root/input_fastqs
+        #gcloud storage cp $read3_fastq_files /cromwell_root/input_fastqs
+
+        path="input_fastqs/"
         barcode_index="~{barcode_index1}"
         file="${path}${barcode_index}"
         zcat "$file" | sed -n '2~4p' | shuf -n 1000 > downsample.fq
@@ -313,7 +336,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ2_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R1 input_fastq/$BASE`
+            BASE=`echo --R1 input_fastqs/$BASE`
             R1_FILES_CONCAT+="$BASE "
         done
         echo $R1_FILES_CONCAT
@@ -323,7 +346,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ1_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R2 input_fastq/$BASE`
+            BASE=`echo --R2 input_fastqs/$BASE`
             R2_FILES_CONCAT+="$BASE "
         done
         echo $R2_FILES_CONCAT
@@ -333,7 +356,7 @@ task FastqProcessATAC {
         for fastq in "${FASTQ3_ARRAY[@]}"
         do
             BASE=`basename $fastq`
-            BASE=`echo --R3 input_fastq/$BASE`
+            BASE=`echo --R3 input_fastqs/$BASE`
             R3_FILES_CONCAT+="$BASE "
         done
         echo $R3_FILES_CONCAT

From 49f6df10000f95a990ff45e257e0e8ee59697dd9 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 12:02:41 -0400
Subject: [PATCH 085/186] add lots of logging to batch logic

---
 pipelines/skylab/snm3C/snm3C.wdl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 689bba7849..f08eee5596 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -202,7 +202,9 @@ task Demultiplexing {
 
     # Batch the fastq files into folders of batch_number size
     batch_number=~{batch_number}
+    echo "batch number: $batch_number"
     for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
+        echo "making batch directory: batch${i}"
         mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
     done
 
@@ -213,19 +215,28 @@ task Demultiplexing {
     # Define lists of r1 and r2 fq files
     R1_files=($(ls $WORKING_DIR | grep "\-R1.fq.gz"))
     R2_files=($(ls $WORKING_DIR | grep "\-R2.fq.gz"))
+    echo "R1 files: $R1_files"
+    echo "R2 files: $R2_files"
 
     # Distribute the FASTQ files and create TAR files
+    echo "starting loop of files"
     for file in "${R1_files[@]}"; do
         sample_id=$(basename "$file" "-R1.fq.gz")
+        echo "sampleId: $sample_id"
         r2_file="${sample_id}-R2.fq.gz"
+        echo "r2 file: $r2_file"
         mv $WORKING_DIR/$file batch$((folder_index))/$file
+        echo "moved $WORKING_DIR/$file to: batch$((folder_index))/$file"
         mv $WORKING_DIR/$r2_file batch$((folder_index))/$r2_file
+        echo "moved $WORKING_DIR/$r2_file to: batch$((folder_index))/$r2_file"
         # Increment the counter
         folder_index=$(( (folder_index % $batch_number) + 1 ))
+        echo "folder index is now: $folder_index"
     done
 
     # Tar up files per batch
     for i in $(seq 1 "${batch_number}"); do
+        echo "tarring $WORKING_DIR/batch${i}/*.fq.gz and outputting:  $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz"
         tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz
     done
   >>>

From 9823190a708a88b044ece0d1beb938beb6f00388 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 12:05:19 -0400
Subject: [PATCH 086/186] add lots of logging to batch logic

---
 pipelines/skylab/snm3C/snm3C.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index f08eee5596..263aa2a370 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -236,6 +236,7 @@ task Demultiplexing {
 
     # Tar up files per batch
     for i in $(seq 1 "${batch_number}"); do
+        echo " working on batch: batch${i}"
         echo "tarring $WORKING_DIR/batch${i}/*.fq.gz and outputting:  $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz"
         tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz
     done

From 76ef8e5500ce3b6308b7a6d6bfff5efa7ed899fd Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 13:18:31 -0400
Subject: [PATCH 087/186] change output dir of bams

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 263aa2a370..83dc7ded4e 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -404,7 +404,7 @@ task Hisat_paired_end {
           # call separate_unique_and_multi_align_reads
           start=$(date +%s)
           echo "Run separate_unique_and_multi_align_reads"
-          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run separate_unique_and_multi_align_reads: $elapsed seconds"
@@ -412,7 +412,7 @@ task Hisat_paired_end {
           # call split_hisat3n_unmapped_reads
           start=$(date +%s)
           echo "Run split_hisat3n_unmapped_reads"
-          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"

From 7c28296f398a1be1fa568a061baac57371cdb244 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 14:13:52 -0400
Subject: [PATCH 088/186] fix path of fastq

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 83dc7ded4e..aaf178b047 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -355,7 +355,7 @@ task Hisat_paired_end {
           # sort 
           start=$(date +%s)
           echo "Run sort r2"
-          zcat $batch_dir/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
+          zcat $batch_dir/$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run sort r2: $elapsed seconds"

From cf243676a86be7bd846bbfffbef91e2c22510749 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 14:51:51 -0400
Subject: [PATCH 089/186] fix path of fastq

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index aaf178b047..24374eae4e 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -404,7 +404,7 @@ task Hisat_paired_end {
           # call separate_unique_and_multi_align_reads
           start=$(date +%s)
           echo "Run separate_unique_and_multi_align_reads"
-          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run separate_unique_and_multi_align_reads: $elapsed seconds"
@@ -412,7 +412,7 @@ task Hisat_paired_end {
           # call split_hisat3n_unmapped_reads
           start=$(date +%s)
           echo "Run split_hisat3n_unmapped_reads"
-          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$WORKING_DIR"'/"'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"

From 4d0f214fd7330a30c1b24e2ad8f1c16f84a148b0 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 14:53:00 -0400
Subject: [PATCH 090/186] fix fastq remove path

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 24374eae4e..470ac66ee5 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -417,7 +417,7 @@ task Hisat_paired_end {
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
           
-          rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/batch*/${sample_id}-R2.fq.gz
+          rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
           rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
           rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz
           rm ${sample_id}.hisat3n_dna.unsort.bam ${sample_id}.hisat3n_dna.multi_aligned.bam

From 63ea99a94257cbce22f90a45aa0996a460e067ee Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 14:53:53 -0400
Subject: [PATCH 091/186] add more echo statements

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 470ac66ee5..69ad65b3a6 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -416,7 +416,9 @@ task Hisat_paired_end {
           end=$(date +%s) 
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
-          
+
+
+          echo "removing files now!"
           rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
           rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
           rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz

From 259f13fd5586d5c98e0860774357ddddf5d9033e Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 14:54:57 -0400
Subject: [PATCH 092/186] add more echo statements again

---
 pipelines/skylab/snm3C/snm3C.wdl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 69ad65b3a6..27ceda7868 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -417,7 +417,6 @@ task Hisat_paired_end {
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
 
-
           echo "removing files now!"
           rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
           rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq

From f19fd649bb76ca6c0dba27cca2653b7bf0e9af4f Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 14:58:03 -0400
Subject: [PATCH 093/186] trigger update of wdl in workpace

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 27ceda7868..e2bd3508b3 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -429,8 +429,8 @@ task Hisat_paired_end {
       R1_files=($(ls $batch_dir | grep "\-R1.fq.gz"))
       R2_files=($(ls $batch_dir | grep "\-R2.fq.gz"))
 
-      echo "r1 files: $R1_files"
-      echo "r2 files: $R2_files"
+      echo "Found r1 files: $R1_files"
+      echo "Found r2 files: $R2_files"
 
       # for file in "${R1_files[@]}"; do
       # (

From 9c9155419cb8f7262b6b6e3d4c1982bb50498e63 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 9 May 2024 15:52:56 -0400
Subject: [PATCH 094/186] remove some logging

---
 pipelines/skylab/snm3C/snm3C.wdl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index e2bd3508b3..596e77ce49 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -417,7 +417,6 @@ task Hisat_paired_end {
           elapsed=$((end - start)) 
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
 
-          echo "removing files now!"
           rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
           rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
           rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz

From 834f00babd52cfaafecc6143e647d243b172d9e1 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 10 May 2024 09:22:45 -0400
Subject: [PATCH 095/186] fix logging error syntax

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 596e77ce49..51e90ad865 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -474,7 +474,7 @@ task Hisat_paired_end {
       
       # Check if the count of FASTQ files matches the length of the array ${R1_files[@]}
       if [ "$fastq_counts" -ne  "$((2 * array_length))" ]; then
-         echo "Error: Number of FASTQ files ($fastq_count) does not match the 2 * length of the array (${#R1_files[@]})."
+         echo "Error: Number of FASTQ files: $fastq_count does not match the 2 * length of the array: ${#R1_files[@]}."
          exit 1
       fi
 

From a82b92eb0b1ccf960bd2b07c889d89212ff48fbd Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 10 May 2024 11:08:27 -0400
Subject: [PATCH 096/186] add missing quote

---
 pipelines/skylab/snm3C/snm3C.wdl | 91 ++++++++++++++++----------------
 1 file changed, 45 insertions(+), 46 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 51e90ad865..65a7567ea3 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -281,11 +281,14 @@ task Hisat_paired_end {
     }
 
     command <<<
-        set -euo pipefail
+              echo "Tar up stats"
+      start=$(date +%s)
+      tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
+      tar -cf - *.hisat3n_set -euo pipefail
         set -x
         lscpu
         WORKING_DIR=`pwd`
-  
+
         # check genomic reference version and print to output txt file
         STRING=~{genome_fa}
         BASE=$(basename $STRING .fa)
@@ -295,31 +298,31 @@ task Hisat_paired_end {
         # untar the index files for hisat task
         start=$(date +%s)
         echo "Untarring tarred_index_files"
-        pigz -dc ~{tarred_index_files} | tar -xf -  
+        pigz -dc ~{tarred_index_files} | tar -xf -
         rm ~{tarred_index_files}
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to untar tarred_index_files: $elapsed seconds"
-    
+
         # get the basename of the genome_fa file
         cp ~{genome_fa} .
         genome_fa_basename=$(basename ~{genome_fa} .fa)
-       
+
         start=$(date +%s)
         echo "samtools faidx $genome_fa_basename.fa"
         samtools faidx $genome_fa_basename.fa
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to samtools faidx: $elapsed seconds"
 
         min_read_length=~{min_read_length}
-  
+
         # untar the demultiplexed fastqs for sort and trim task
         start=$(date +%s)
         echo "Untar demultiplexed fastqs"
-        pigz -dc ~{tarred_demultiplexed_fastqs} | tar -xf -  
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        pigz -dc ~{tarred_demultiplexed_fastqs} | tar -xf -
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to untar: $elapsed seconds"
 
         echo "lsing current dir:"
@@ -343,23 +346,23 @@ task Hisat_paired_end {
 
           r2_file="${sample_id}-R2.fq.gz"
           r1_file="${sample_id}-R1.fq.gz"
-          
-          # sort 
+
+          # sort
           start=$(date +%s)
           echo "Run sort r1"
           zcat $batch_dir/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
-          end=$(date +%s) 
-          elapsed=$((end - start)) 
+          end=$(date +%s)
+          elapsed=$((end - start))
           echo "Elapsed time to run sort r1: $elapsed seconds"
-    
-          # sort 
+
+          # sort
           start=$(date +%s)
           echo "Run sort r2"
-          zcat $batch_dir/$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
-          end=$(date +%s) 
-          elapsed=$((end - start)) 
+          zcat $batch_dir/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
+          end=$(date +%s)
+          elapsed=$((end - start))
           echo "Elapsed time to run sort r2: $elapsed seconds"
-    
+
           # trim using cutadapt
           start=$(date +%s)
           echo "Run cutadapt"
@@ -378,10 +381,10 @@ task Hisat_paired_end {
           -p $WORKING_DIR/${sample_id}-R2_trimmed.fq.gz \
           $WORKING_DIR/${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \
           > $WORKING_DIR/${sample_id}.trimmed.stats.txt
-          end=$(date +%s) 
-          elapsed=$((end - start)) 
+          end=$(date +%s)
+          elapsed=$((end - start))
           echo "Elapsed time to run cutadapt: $elapsed seconds"
-    
+
           # hisat run
           start=$(date +%s)
           echo "Run hisat"
@@ -396,25 +399,25 @@ task Hisat_paired_end {
           -t \
           --new-summary \
           --summary-file ${sample_id}.hisat3n_dna_summary.txt \
-          --threads 8 | samtools view -b -q 0 -o "${sample_id}.hisat3n_dna.unsort.bam"       
-          end=$(date +%s) 
-          elapsed=$((end - start)) 
+          --threads 8 | samtools view -b -q 0 -o "${sample_id}.hisat3n_dna.unsort.bam"
+          end=$(date +%s)
+          elapsed=$((end - start))
           echo "Elapsed time to run hisat: $elapsed seconds"
-       
+
           # call separate_unique_and_multi_align_reads
           start=$(date +%s)
           echo "Run separate_unique_and_multi_align_reads"
           python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
-          end=$(date +%s) 
-          elapsed=$((end - start)) 
+          end=$(date +%s)
+          elapsed=$((end - start))
           echo "Elapsed time to run separate_unique_and_multi_align_reads: $elapsed seconds"
-    
+
           # call split_hisat3n_unmapped_reads
           start=$(date +%s)
           echo "Run split_hisat3n_unmapped_reads"
           python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
-          end=$(date +%s) 
-          elapsed=$((end - start)) 
+          end=$(date +%s)
+          elapsed=$((end - start))
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
 
           rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
@@ -439,7 +442,7 @@ task Hisat_paired_end {
       # )
       # done
 
-      # run 6 instances of task in parallel 
+      # run 6 instances of task in parallel
       for file in "${R1_files[@]}"; do
         (
           echo "starting task $file.."
@@ -456,13 +459,13 @@ task Hisat_paired_end {
       wait
       echo "Tasks all done."
       du -h *
-      
-      #################################### 
+
+      ####################################
       ## make sure that the number of output bams equals the length of R1_files
       # Count the number of *.hisat3n_dna.unique_aligned.bam files
       bam_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l)
       fastq_counts=$(find . -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l)
-      
+
       # Get the length of the array ${R1_files[@]}
       array_length=${#R1_files[@]}
 
@@ -471,7 +474,7 @@ task Hisat_paired_end {
          echo "Error: Number of BAM files does not match the length of the array."
          exit 1
       fi
-      
+
       # Check if the count of FASTQ files matches the length of the array ${R1_files[@]}
       if [ "$fastq_counts" -ne  "$((2 * array_length))" ]; then
          echo "Error: Number of FASTQ files: $fastq_count does not match the 2 * length of the array: ${#R1_files[@]}."
@@ -479,13 +482,10 @@ task Hisat_paired_end {
       fi
 
       echo "Number of BAM and FASTQ files matches the length of the array."
-      #################################### 
+      ####################################
 
       # tar up stats
-      echo "Tar up stats"
-      start=$(date +%s)
-      tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
-      tar -cf - *.hisat3n_dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz
+dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz
       end=$(date +%s) 
       elapsed=$((end - start))  
       echo "Elapsed time to run tar stats $elapsed seconds"
@@ -505,7 +505,6 @@ task Hisat_paired_end {
       end=$(date +%s) 
       elapsed=$((end - start))  
       echo "Elapsed time to run tar fastqs $elapsed seconds"
-
     >>>
 
     runtime {

From e0f4863f373263a497d9b69b4f1c7739a9a7b893 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 10 May 2024 12:07:31 -0400
Subject: [PATCH 097/186] change working dir to batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 38 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 65a7567ea3..5e0c226536 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -350,7 +350,7 @@ task Hisat_paired_end {
           # sort
           start=$(date +%s)
           echo "Run sort r1"
-          zcat $batch_dir/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
+          zcat $batch_dir/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "$batch_dir/${sample_id}-R1_sorted.fq"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run sort r1: $elapsed seconds"
@@ -358,7 +358,7 @@ task Hisat_paired_end {
           # sort
           start=$(date +%s)
           echo "Run sort r2"
-          zcat $batch_dir/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
+          zcat $batch_dir/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "$batch_dir/${sample_id}-R2_sorted.fq"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run sort r2: $elapsed seconds"
@@ -377,10 +377,10 @@ task Hisat_paired_end {
           -Z \
           -m ${min_read_length}:${min_read_length} \
           --pair-filter 'both' \
-          -o $WORKING_DIR/${sample_id}-R1_trimmed.fq.gz \
-          -p $WORKING_DIR/${sample_id}-R2_trimmed.fq.gz \
-          $WORKING_DIR/${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \
-          > $WORKING_DIR/${sample_id}.trimmed.stats.txt
+          -o $batch_dir/${sample_id}-R1_trimmed.fq.gz \
+          -p $batch_dir/${sample_id}-R2_trimmed.fq.gz \
+          $batch_dir/${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \
+          > $batch_dir/${sample_id}.trimmed.stats.txt
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run cutadapt: $elapsed seconds"
@@ -390,16 +390,16 @@ task Hisat_paired_end {
           echo "Run hisat"
           hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
           -q \
-          -1 ${sample_id}-R1_trimmed.fq.gz \
-          -2 ${sample_id}-R2_trimmed.fq.gz \
+          -1 $batch_dir/${sample_id}-R1_trimmed.fq.gz \
+          -2 $batch_dir/${sample_id}-R2_trimmed.fq.gz \
           --directional-mapping-reverse --base-change C,T \
           --no-repeat-index \
           --no-spliced-alignment \
           --no-temp-splicesite \
           -t \
           --new-summary \
-          --summary-file ${sample_id}.hisat3n_dna_summary.txt \
-          --threads 8 | samtools view -b -q 0 -o "${sample_id}.hisat3n_dna.unsort.bam"
+          --summary-file $batch_dir/${sample_id}.hisat3n_dna_summary.txt \
+          --threads 8 | samtools view -b -q 0 -o $batch_dir/"${sample_id}.hisat3n_dna.unsort.bam"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run hisat: $elapsed seconds"
@@ -407,7 +407,7 @@ task Hisat_paired_end {
           # call separate_unique_and_multi_align_reads
           start=$(date +%s)
           echo "Run separate_unique_and_multi_align_reads"
-          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$batch_dir/$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run separate_unique_and_multi_align_reads: $elapsed seconds"
@@ -415,16 +415,16 @@ task Hisat_paired_end {
           # call split_hisat3n_unmapped_reads
           start=$(date +%s)
           echo "Run split_hisat3n_unmapped_reads"
-          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
 
           rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
-          rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
-          rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz
-          rm ${sample_id}.hisat3n_dna.unsort.bam ${sample_id}.hisat3n_dna.multi_aligned.bam
-          rm ${sample_id}.hisat3n_dna.unmapped.fastq
+          rm $batch_dir${sample_id}-R1_sorted.fq $batch_dir${sample_id}-R2_sorted.fq
+          rm $batch_dir${sample_id}-R1_trimmed.fq.gz $batch_dir${sample_id}-R2_trimmed.fq.gz
+          rm $batch_dir${sample_id}.hisat3n_dna.unsort.bam $batch_dir${sample_id}.hisat3n_dna.multi_aligned.bam
+          rm $batch_dir${sample_id}.hisat3n_dna.unmapped.fastq
        }
 
 
@@ -463,8 +463,8 @@ task Hisat_paired_end {
       ####################################
       ## make sure that the number of output bams equals the length of R1_files
       # Count the number of *.hisat3n_dna.unique_aligned.bam files
-      bam_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l)
-      fastq_counts=$(find . -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l)
+      bam_count=$(find $batch_dir -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l)
+      fastq_counts=$(find $batch_dir -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l)
 
       # Get the length of the array ${R1_files[@]}
       array_length=${#R1_files[@]}
@@ -485,7 +485,7 @@ task Hisat_paired_end {
       ####################################
 
       # tar up stats
-dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz
+      dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz
       end=$(date +%s) 
       elapsed=$((end - start))  
       echo "Elapsed time to run tar stats $elapsed seconds"

From 15269a1cad34b5e6b48fadfe6633a2c60982d784 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 10 May 2024 13:46:08 -0400
Subject: [PATCH 098/186] add ls

---
 tasks/skylab/PairedTagUtils.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl
index acc1678a58..8fc4d0d7a1 100644
--- a/tasks/skylab/PairedTagUtils.wdl
+++ b/tasks/skylab/PairedTagUtils.wdl
@@ -107,6 +107,7 @@ task PairedTagDemultiplex {
         elif [[ $COUNT == 24 && ~{preindex} == "false" ]]
           then
           echo "FASTQ has correct index length, no modification necessary"
+          ls -lh
           mv "~{input_id}_R2_prefix.fq.gz" "~{r2_base}.fq.gz"
           mv "~{input_id}_R1_prefix.fq.gz" "~{r1_base}.fq.gz"
           mv "~{input_id}_R3_prefix.fq.gz" "~{r3_base}.fq.gz"

From 2d4626eb435fff1dcbe7538bf2d700caec775e04 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Fri, 10 May 2024 14:03:13 -0400
Subject: [PATCH 099/186] version change

---
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index e703fd99d6..a7fc887109 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks
 workflow VariantCalling {
 
 
-  String pipeline_version = "2.1.19"
+  String pipeline_version = "2.1.20"
 
 
   input {

From 653cb5e05bf97593b872fd5992b8ecfb3fe40bf9 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 10 May 2024 14:17:43 -0400
Subject: [PATCH 100/186] fix paths

---
 pipelines/skylab/snm3C/snm3C.wdl | 47 ++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 5e0c226536..d7809b1b3c 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -281,10 +281,10 @@ task Hisat_paired_end {
     }
 
     command <<<
-              echo "Tar up stats"
-      start=$(date +%s)
-      tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
-      tar -cf - *.hisat3n_set -euo pipefail
+        echo "Tar up stats"
+        start=$(date +%s)
+        tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
+        tar -cf - *.hisat3n_set -euo pipefail
         set -x
         lscpu
         WORKING_DIR=`pwd`
@@ -350,7 +350,7 @@ task Hisat_paired_end {
           # sort
           start=$(date +%s)
           echo "Run sort r1"
-          zcat $batch_dir/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "$batch_dir/${sample_id}-R1_sorted.fq"
+          zcat $batch_dir/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run sort r1: $elapsed seconds"
@@ -358,7 +358,7 @@ task Hisat_paired_end {
           # sort
           start=$(date +%s)
           echo "Run sort r2"
-          zcat $batch_dir/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "$batch_dir/${sample_id}-R2_sorted.fq"
+          zcat $batch_dir/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run sort r2: $elapsed seconds"
@@ -377,10 +377,10 @@ task Hisat_paired_end {
           -Z \
           -m ${min_read_length}:${min_read_length} \
           --pair-filter 'both' \
-          -o $batch_dir/${sample_id}-R1_trimmed.fq.gz \
-          -p $batch_dir/${sample_id}-R2_trimmed.fq.gz \
-          $batch_dir/${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \
-          > $batch_dir/${sample_id}.trimmed.stats.txt
+          -o $batch_dir${sample_id}-R1_trimmed.fq.gz \
+          -p $batch_dir${sample_id}-R2_trimmed.fq.gz \
+          $batch_dir${sample_id}-R1_sorted.fq $batch_dir${sample_id}-R2_sorted.fq \
+          > $batch_dir${sample_id}.trimmed.stats.txt
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run cutadapt: $elapsed seconds"
@@ -390,16 +390,16 @@ task Hisat_paired_end {
           echo "Run hisat"
           hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
           -q \
-          -1 $batch_dir/${sample_id}-R1_trimmed.fq.gz \
-          -2 $batch_dir/${sample_id}-R2_trimmed.fq.gz \
+          -1 $batch_dir${sample_id}-R1_trimmed.fq.gz \
+          -2 $batch_dir${sample_id}-R2_trimmed.fq.gz \
           --directional-mapping-reverse --base-change C,T \
           --no-repeat-index \
           --no-spliced-alignment \
           --no-temp-splicesite \
           -t \
           --new-summary \
-          --summary-file $batch_dir/${sample_id}.hisat3n_dna_summary.txt \
-          --threads 8 | samtools view -b -q 0 -o $batch_dir/"${sample_id}.hisat3n_dna.unsort.bam"
+          --summary-file $batch_dir${sample_id}.hisat3n_dna_summary.txt \
+          --threads 8 | samtools view -b -q 0 -o $batch_dir"${sample_id}.hisat3n_dna.unsort.bam"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run hisat: $elapsed seconds"
@@ -407,7 +407,7 @@ task Hisat_paired_end {
           # call separate_unique_and_multi_align_reads
           start=$(date +%s)
           echo "Run separate_unique_and_multi_align_reads"
-          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$batch_dir/$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$batch_dir$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$batch_dir$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$batch_dir$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$batch_dir$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run separate_unique_and_multi_align_reads: $elapsed seconds"
@@ -415,16 +415,16 @@ task Hisat_paired_end {
           # call split_hisat3n_unmapped_reads
           start=$(date +%s)
           echo "Run split_hisat3n_unmapped_reads"
-          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$batch_dir/$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$batch_dir$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
 
           rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
-          rm $batch_dir${sample_id}-R1_sorted.fq $batch_dir${sample_id}-R2_sorted.fq
-          rm $batch_dir${sample_id}-R1_trimmed.fq.gz $batch_dir${sample_id}-R2_trimmed.fq.gz
-          rm $batch_dir${sample_id}.hisat3n_dna.unsort.bam $batch_dir${sample_id}.hisat3n_dna.multi_aligned.bam
-          rm $batch_dir${sample_id}.hisat3n_dna.unmapped.fastq
+          rm $batch_dir/${sample_id}-R1_sorted.fq $batch_dir/${sample_id}-R2_sorted.fq
+          rm $batch_dir/${sample_id}-R1_trimmed.fq.gz $batch_dir/${sample_id}-R2_trimmed.fq.gz
+          rm $batch_dir/${sample_id}.hisat3n_dna.unsort.bam $batch_dir/${sample_id}.hisat3n_dna.multi_aligned.bam
+          rm $batch_dir/${sample_id}.hisat3n_dna.unmapped.fastq
        }
 
 
@@ -463,6 +463,13 @@ task Hisat_paired_end {
       ####################################
       ## make sure that the number of output bams equals the length of R1_files
       # Count the number of *.hisat3n_dna.unique_aligned.bam files
+      echo "lsing batch dir"
+      ls $batch_dir
+      echo "ls current dir"
+      ls
+      echo "lsing working dir"
+      echo $WORKING_DIR
+
       bam_count=$(find $batch_dir -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l)
       fastq_counts=$(find $batch_dir -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l)
 

From b7e791c18550633c364a95d99bf2df90758f3a0c Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 10 May 2024 15:01:46 -0400
Subject: [PATCH 101/186] fix batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index d7809b1b3c..45e33b6cfc 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -334,7 +334,7 @@ task Hisat_paired_end {
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
-            batch_dir="~{cromwell_root_dir}/*/*/*/*/*/~{cromwell_root_dir}/*/*/*/*/batch*/"
+            batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/"
         fi
         echo "batchdirectory: $batch_dir"
 

From a6fd83b868e51d4ed77093749342ba4fa0e4727c Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 10 May 2024 15:03:33 -0400
Subject: [PATCH 102/186] fix batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 45e33b6cfc..75694a4f7d 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -346,6 +346,9 @@ task Hisat_paired_end {
 
           r2_file="${sample_id}-R2.fq.gz"
           r1_file="${sample_id}-R1.fq.gz"
+          echo "r1 file: $r1_file"
+          echo "r2 file: $r2_file"
+          echo "batch dir: $batch_dir"
 
           # sort
           start=$(date +%s)

From 81cea6a07174034ff2187e81eb62575cc218f608 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 10 May 2024 16:04:18 -0400
Subject: [PATCH 103/186] fix directories

---
 pipelines/skylab/snm3C/snm3C.wdl | 48 +++++++++++++++++---------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 75694a4f7d..b1aaa6b998 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -281,13 +281,15 @@ task Hisat_paired_end {
     }
 
     command <<<
+        WORKING_DIR=`pwd`
         echo "Tar up stats"
+        ls -lR
         start=$(date +%s)
-        tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
-        tar -cf - *.hisat3n_set -euo pipefail
+        tar -cf - $WORKING_DIR/*.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
+        tar -cf - $WORKING_DIR/*.hisat3n_set -euo pipefail
         set -x
         lscpu
-        WORKING_DIR=`pwd`
+
 
         # check genomic reference version and print to output txt file
         STRING=~{genome_fa}
@@ -349,11 +351,13 @@ task Hisat_paired_end {
           echo "r1 file: $r1_file"
           echo "r2 file: $r2_file"
           echo "batch dir: $batch_dir"
+          cp $batch_dir/"$r1_file" .
+          cp $batch_dir/"$r2_file" .
 
           # sort
           start=$(date +%s)
           echo "Run sort r1"
-          zcat $batch_dir/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
+          zcat "$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run sort r1: $elapsed seconds"
@@ -361,7 +365,7 @@ task Hisat_paired_end {
           # sort
           start=$(date +%s)
           echo "Run sort r2"
-          zcat $batch_dir/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
+          zcat "$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run sort r2: $elapsed seconds"
@@ -380,10 +384,10 @@ task Hisat_paired_end {
           -Z \
           -m ${min_read_length}:${min_read_length} \
           --pair-filter 'both' \
-          -o $batch_dir${sample_id}-R1_trimmed.fq.gz \
-          -p $batch_dir${sample_id}-R2_trimmed.fq.gz \
-          $batch_dir${sample_id}-R1_sorted.fq $batch_dir${sample_id}-R2_sorted.fq \
-          > $batch_dir${sample_id}.trimmed.stats.txt
+          -o ${sample_id}-R1_trimmed.fq.gz \
+          -p ${sample_id}-R2_trimmed.fq.gz \
+          ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \
+          > ${sample_id}.trimmed.stats.txt
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run cutadapt: $elapsed seconds"
@@ -393,16 +397,16 @@ task Hisat_paired_end {
           echo "Run hisat"
           hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
           -q \
-          -1 $batch_dir${sample_id}-R1_trimmed.fq.gz \
-          -2 $batch_dir${sample_id}-R2_trimmed.fq.gz \
+          -1 ${sample_id}-R1_trimmed.fq.gz \
+          -2 ${sample_id}-R2_trimmed.fq.gz \
           --directional-mapping-reverse --base-change C,T \
           --no-repeat-index \
           --no-spliced-alignment \
           --no-temp-splicesite \
           -t \
           --new-summary \
-          --summary-file $batch_dir${sample_id}.hisat3n_dna_summary.txt \
-          --threads 8 | samtools view -b -q 0 -o $batch_dir"${sample_id}.hisat3n_dna.unsort.bam"
+          --summary-file ${sample_id}.hisat3n_dna_summary.txt \
+          --threads 8 | samtools view -b -q 0 -o "${sample_id}.hisat3n_dna.unsort.bam"
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run hisat: $elapsed seconds"
@@ -410,7 +414,7 @@ task Hisat_paired_end {
           # call separate_unique_and_multi_align_reads
           start=$(date +%s)
           echo "Run separate_unique_and_multi_align_reads"
-          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$batch_dir$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$batch_dir$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$batch_dir$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$batch_dir$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')'
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run separate_unique_and_multi_align_reads: $elapsed seconds"
@@ -418,16 +422,16 @@ task Hisat_paired_end {
           # call split_hisat3n_unmapped_reads
           start=$(date +%s)
           echo "Run split_hisat3n_unmapped_reads"
-          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$batch_dir$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
+          python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')'
           end=$(date +%s)
           elapsed=$((end - start))
           echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds"
 
-          rm $batch_dir/${sample_id}-R1.fq.gz $batch_dir/${sample_id}-R2.fq.gz
-          rm $batch_dir/${sample_id}-R1_sorted.fq $batch_dir/${sample_id}-R2_sorted.fq
-          rm $batch_dir/${sample_id}-R1_trimmed.fq.gz $batch_dir/${sample_id}-R2_trimmed.fq.gz
-          rm $batch_dir/${sample_id}.hisat3n_dna.unsort.bam $batch_dir/${sample_id}.hisat3n_dna.multi_aligned.bam
-          rm $batch_dir/${sample_id}.hisat3n_dna.unmapped.fastq
+          rm ${sample_id}-R1.fq.gz ${sample_id}-R2.fq.gz
+          rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq
+          rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz
+          rm ${sample_id}.hisat3n_dna.unsort.bam ${sample_id}.hisat3n_dna.multi_aligned.bam
+          rm ${sample_id}.hisat3n_dna.unmapped.fastq
        }
 
 
@@ -473,8 +477,8 @@ task Hisat_paired_end {
       echo "lsing working dir"
       echo $WORKING_DIR
 
-      bam_count=$(find $batch_dir -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l)
-      fastq_counts=$(find $batch_dir -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l)
+      bam_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l)
+      fastq_counts=$(find . -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l)
 
       # Get the length of the array ${R1_files[@]}
       array_length=${#R1_files[@]}

From 1cbdd46615f40220fb95a8a6529c6a23fd626000 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 13 May 2024 11:10:48 -0400
Subject: [PATCH 104/186] moving around inputs

---
 pipelines/skylab/snm3C/snm3C.wdl | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index b1aaa6b998..b13ccd721d 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -281,15 +281,19 @@ task Hisat_paired_end {
     }
 
     command <<<
+
+
         WORKING_DIR=`pwd`
-        echo "Tar up stats"
-        ls -lR
-        start=$(date +%s)
-        tar -cf - $WORKING_DIR/*.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
-        tar -cf - $WORKING_DIR/*.hisat3n_set -euo pipefail
-        set -x
-        lscpu
+        mkdir -p $WORKING_DIR/pipeline_inputs/
+
+        mv ~{tarred_demultiplexed_fastqs} $WORKING_DIR/pipeline_inputs/
+        mv ~{tarred_index_files} $WORKING_DIR/pipeline_inputs/
+        mv ~{genome_fa} $WORKING_DIR/pipeline_inputs/
+        mv ~{chromosome_sizes} $WORKING_DIR/pipeline_inputs/
 
+        cd $WORKING_DIR/pipeline_inputs/
+
+        ls -l
 
         # check genomic reference version and print to output txt file
         STRING=~{genome_fa}
@@ -297,6 +301,9 @@ task Hisat_paired_end {
 
         echo "The reference is $BASE" > ~{plate_id}.reference_version.txt
 
+        echo "the path to tarred_index_files is:"
+        echo ~{tarred_index_files}
+
         # untar the index files for hisat task
         start=$(date +%s)
         echo "Untarring tarred_index_files"

From ea0458c2613c42690562ed85bbec6774335de811 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 13 May 2024 11:25:14 -0400
Subject: [PATCH 105/186] remove working dir from name of tar file

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index b13ccd721d..3aab7f355f 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -237,8 +237,8 @@ task Demultiplexing {
     # Tar up files per batch
     for i in $(seq 1 "${batch_number}"); do
         echo " working on batch: batch${i}"
-        echo "tarring $WORKING_DIR/batch${i}/*.fq.gz and outputting:  $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz"
-        tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > $WORKING_DIR/~{plate_id}.${i}.cutadapt_output_files.tar.gz
+        echo "tarring $WORKING_DIR/batch${i}/*.fq.gz and outputting:  ~{plate_id}.${i}.cutadapt_output_files.tar.gz"
+        tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > ~{plate_id}.${i}.cutadapt_output_files.tar.gz
     done
   >>>
 

From 91b4b7c595f2a3bf5d96333719536edb0d500934 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 13 May 2024 11:48:53 -0400
Subject: [PATCH 106/186] take basenames

---
 pipelines/skylab/snm3C/snm3C.wdl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 3aab7f355f..c58c50c4bb 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -301,20 +301,20 @@ task Hisat_paired_end {
 
         echo "The reference is $BASE" > ~{plate_id}.reference_version.txt
 
-        echo "the path to tarred_index_files is:"
-        echo ~{tarred_index_files}
-
         # untar the index files for hisat task
         start=$(date +%s)
         echo "Untarring tarred_index_files"
-        pigz -dc ~{tarred_index_files} | tar -xf -
-        rm ~{tarred_index_files}
+
+        #take the basename of the demultiplexed fastq tar file
+        index_basename=$(basename ~{tarred_index_files})
+        pigz -dc $index_basename | tar -xf -
+        rm $index_basename
+
         end=$(date +%s)
         elapsed=$((end - start))
         echo "Elapsed time to untar tarred_index_files: $elapsed seconds"
 
         # get the basename of the genome_fa file
-        cp ~{genome_fa} .
         genome_fa_basename=$(basename ~{genome_fa} .fa)
 
         start=$(date +%s)
@@ -329,21 +329,22 @@ task Hisat_paired_end {
         # untar the demultiplexed fastqs for sort and trim task
         start=$(date +%s)
         echo "Untar demultiplexed fastqs"
-        pigz -dc ~{tarred_demultiplexed_fastqs} | tar -xf -
+        #take the basename of the demultiplexed fastq tar file
+        demultiplexed_basename=$(basename ~{tarred_demultiplexed_fastqs})
+
+        pigz -dc $demultiplexed_basename | tar -xf -
         end=$(date +%s)
         elapsed=$((end - start))
         echo "Elapsed time to untar: $elapsed seconds"
 
         echo "lsing current dir:"
         ls -lR
-        echo "lsing cromwell root:"
-        ls -lR ~{cromwell_root_dir}
 
         # define lists of r1 and r2 fq files
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
-            batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/"
+            batch_dir="~{cromwell_root_dir}/*/*/*/*/batch*/"
         fi
         echo "batchdirectory: $batch_dir"
 

From 60274b942d9af13aeaacb1a5c13f0151095d8897 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 13 May 2024 12:27:32 -0400
Subject: [PATCH 107/186] recursively ls the root to find the batch dirs

---
 pipelines/skylab/snm3C/snm3C.wdl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index c58c50c4bb..49b9d1b0cd 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -281,8 +281,6 @@ task Hisat_paired_end {
     }
 
     command <<<
-
-
         WORKING_DIR=`pwd`
         mkdir -p $WORKING_DIR/pipeline_inputs/
 
@@ -340,6 +338,9 @@ task Hisat_paired_end {
         echo "lsing current dir:"
         ls -lR
 
+        echo "lsing root dir:"
+        ls -lR ~{cromwell_root_dir}
+
         # define lists of r1 and r2 fq files
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"

From 5d2c646b146fd74775713d68e3aa95385185827e Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 13 May 2024 13:13:42 -0400
Subject: [PATCH 108/186] add echo statement

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 49b9d1b0cd..7275720e77 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -328,7 +328,9 @@ task Hisat_paired_end {
         start=$(date +%s)
         echo "Untar demultiplexed fastqs"
         #take the basename of the demultiplexed fastq tar file
+
         demultiplexed_basename=$(basename ~{tarred_demultiplexed_fastqs})
+        echo "the basename of the tarred_demultiplexed_fastqs is"
 
         pigz -dc $demultiplexed_basename | tar -xf -
         end=$(date +%s)

From f24adf7a62729fceed203a4b4f0abd80de9bad8b Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Mon, 13 May 2024 14:22:02 -0400
Subject: [PATCH 109/186] added docker parameter for
 DragenTasks.CalibrateDragstrModel supporting azure

---
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl  | 3 ++-
 tasks/broad/DragenTasks.wdl                                    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index a7fc887109..31cd1a02ed 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -65,7 +65,8 @@ workflow VariantCalling {
         ref_dict = ref_dict,
         alignment = input_bam,
         alignment_index = input_bam_index,
-        str_table_file = select_first([ref_str])
+        str_table_file = select_first([ref_str]),
+        docker = gatk_docker
     }
   }
 
diff --git a/tasks/broad/DragenTasks.wdl b/tasks/broad/DragenTasks.wdl
index 149eb5fd12..7e28b793bd 100644
--- a/tasks/broad/DragenTasks.wdl
+++ b/tasks/broad/DragenTasks.wdl
@@ -24,7 +24,7 @@ task CalibrateDragstrModel {
     File str_table_file
     File alignment ## can handle cram or bam.
     File alignment_index
-    String docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+    String docker
     Int preemptible_tries = 3
     Int threads = 4
     Int? memory_mb

From ab4ac4abd0a38034bc13cb64b4c89481ff6d80d7 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Mon, 13 May 2024 14:30:47 -0400
Subject: [PATCH 110/186] added docker parameter for Utils.ScatterIntervalList
 to support azure

---
 .../dna_seq/germline/variant_calling/VariantCalling.wdl    | 7 ++++++-
 tasks/broad/Utilities.wdl                                  | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 31cd1a02ed..6cbe58a952 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -44,6 +44,10 @@ workflow VariantCalling {
   String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
   String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
 
+  String picard_cloud_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
+  String picard_cloud_docker_azure = "dsppipelinedev.azurecr.io/picard-python:1.0.0-2.26.10-1663951039"
+  String picard_cloud_docker = if cloud_provider == "gcp" then picard_cloud_docker_gcp else picard_cloud_docker_azure
+
   # make sure either gcp or azr is supplied as cloud_provider input
   if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
     call Utils.ErrorWithMessage as ErrorMessageIncorrectInput {
@@ -77,7 +81,8 @@ workflow VariantCalling {
     input:
       interval_list = calling_interval_list,
       scatter_count = haplotype_scatter_count,
-      break_bands_at_multiples_of = break_bands_at_multiples_of
+      break_bands_at_multiples_of = break_bands_at_multiples_of,
+      docker = picard_cloud_docker
   }
 
   # We need disk to localize the sharded input and output due to the scatter for HaplotypeCaller.
diff --git a/tasks/broad/Utilities.wdl b/tasks/broad/Utilities.wdl
index ce6c101368..3ad524c90d 100644
--- a/tasks/broad/Utilities.wdl
+++ b/tasks/broad/Utilities.wdl
@@ -79,6 +79,7 @@ task ScatterIntervalList {
     File interval_list
     Int scatter_count
     Int break_bands_at_multiples_of
+    String docker
   }
 
   command <<<
@@ -110,7 +111,7 @@ task ScatterIntervalList {
     Int interval_count = read_int(stdout())
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
+    docker: docker
     memory: "2000 MiB"
   }
 }

From 0dbf9eace4488bc27000b3847efa81923c63d8ac Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Mon, 13 May 2024 14:33:16 -0400
Subject: [PATCH 111/186] added docker parameter for
 Calling.HaplotypeCaller_GATK4_VCF to support azure

---
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl  | 3 ++-
 tasks/broad/GermlineVariantDiscovery.wdl                       | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 6cbe58a952..eb2dfc6d8a 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -106,7 +106,8 @@ workflow VariantCalling {
           ref_fasta_index = ref_fasta_index,
           contamination = contamination,
           preemptible_tries = agg_preemptible_tries,
-          hc_scatter = hc_divisor
+          hc_scatter = hc_divisor,
+          docker = gatk_docker
       }
     }
 
diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl
index 7294f2d0b5..3fc8201af0 100644
--- a/tasks/broad/GermlineVariantDiscovery.wdl
+++ b/tasks/broad/GermlineVariantDiscovery.wdl
@@ -27,6 +27,7 @@ task HaplotypeCaller_GATK35_GVCF {
     Float? contamination
     Int preemptible_tries
     Int hc_scatter
+    String docker
   }
 
   parameter_meta {
@@ -66,7 +67,7 @@ task HaplotypeCaller_GATK35_GVCF {
       --read_filter OverclippedRead
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384"
+    docker: docker
     preemptible: preemptible_tries
     memory: "10000 MiB"
     cpu: "1"

From 33023643b1abf83bfb3d42e7a3ddc4f84f3868c1 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Mon, 13 May 2024 14:34:31 -0400
Subject: [PATCH 112/186] added docker parameter for
 Calling.HaplotypeCaller_GATK4_VCF to support azure

---
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl | 4 +++-
 tasks/broad/GermlineVariantDiscovery.wdl                      | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index eb2dfc6d8a..68be8dba04 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -130,7 +130,9 @@ workflow VariantCalling {
           use_dragen_hard_filtering = use_dragen_hard_filtering,
           use_spanning_event_genotyping = use_spanning_event_genotyping,
           dragstr_model = DragstrAutoCalibration.dragstr_model,
-          preemptible_tries = agg_preemptible_tries
+          preemptible_tries = agg_preemptible_tries,
+          gatk_docker = gatk_docker
+
        }
 
       if (use_dragen_hard_filtering) {
diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl
index 3fc8201af0..1ae34a58dc 100644
--- a/tasks/broad/GermlineVariantDiscovery.wdl
+++ b/tasks/broad/GermlineVariantDiscovery.wdl
@@ -97,7 +97,7 @@ task HaplotypeCaller_GATK4_VCF {
     Boolean use_dragen_hard_filtering = false
     Boolean use_spanning_event_genotyping = true
     File? dragstr_model
-    String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+    String gatk_docker 
     Int memory_multiplier = 1
   }
   

From 3e350bd07cbfb9f0811e79633203847321996bcb Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Mon, 13 May 2024 14:39:26 -0400
Subject: [PATCH 113/186] added docker parameter to support azure

---
 .../germline/variant_calling/VariantCalling.wdl      | 12 ++++++++----
 tasks/broad/BamProcessing.wdl                        |  3 ++-
 tasks/broad/GermlineVariantDiscovery.wdl             |  5 +++--
 tasks/broad/Qc.wdl                                   |  3 ++-
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 68be8dba04..ca0c0de091 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -142,7 +142,8 @@ workflow VariantCalling {
             input_vcf_index = HaplotypeCallerGATK4.output_vcf_index,
             make_gvcf = make_gvcf,
             vcf_basename = base_file_name,
-            preemptible_tries = agg_preemptible_tries
+            preemptible_tries = agg_preemptible_tries,
+            gatk_docker = gatk_docker
         }
       }
 
@@ -153,7 +154,8 @@ workflow VariantCalling {
             input_bam = HaplotypeCallerGATK4.bamout,
             output_bam_basename = final_vcf_base_name,
             preemptible_tries = agg_preemptible_tries,
-            compression_level = 2
+            compression_level = 2,
+            docker = picard_cloud_docker
         }
       }
     }
@@ -170,7 +172,8 @@ workflow VariantCalling {
       input_vcfs = vcfs_to_merge,
       input_vcfs_indexes = vcf_indices_to_merge,
       output_vcf_name = final_vcf_base_name + hard_filter_suffix + merge_suffix,
-      preemptible_tries = agg_preemptible_tries
+      preemptible_tries = agg_preemptible_tries,
+      docker = picard_cloud_docker
   }
 
   if (make_gvcf && !skip_reblocking) {
@@ -222,7 +225,8 @@ workflow VariantCalling {
       ref_dict = ref_dict,
       evaluation_interval_list = evaluation_interval_list,
       is_gvcf = make_gvcf,
-      preemptible_tries = agg_preemptible_tries
+      preemptible_tries = agg_preemptible_tries,
+      docker = picard_cloud_docker
   }
 
   output {
diff --git a/tasks/broad/BamProcessing.wdl b/tasks/broad/BamProcessing.wdl
index e5ae21039a..13d88c4f5f 100644
--- a/tasks/broad/BamProcessing.wdl
+++ b/tasks/broad/BamProcessing.wdl
@@ -24,6 +24,7 @@ task SortSam {
     Int compression_level
     Int additional_disk = 20
     Int memory_multiplier = 1
+    String docker
   }
   # SortSam spills to disk a lot more because we are only store 300000 records in RAM now because its faster for our data so it needs
   # more disk space.  Also it spills to disk in an uncompressed format so we need to account for that with a larger multiplier
@@ -46,7 +47,7 @@ task SortSam {
 
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
+    docker: docker
     disks: "local-disk " + disk_size + " HDD"
     cpu: "1"
     memory: "${machine_mem_mb} MiB"
diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl
index 1ae34a58dc..d6bcb77298 100644
--- a/tasks/broad/GermlineVariantDiscovery.wdl
+++ b/tasks/broad/GermlineVariantDiscovery.wdl
@@ -171,6 +171,7 @@ task MergeVCFs {
     Array[File] input_vcfs_indexes
     String output_vcf_name
     Int preemptible_tries = 3
+    String docker
   }
 
   Int disk_size = ceil(size(input_vcfs, "GiB") * 2.5) + 10
@@ -184,7 +185,7 @@ task MergeVCFs {
       OUTPUT=~{output_vcf_name}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
+    docker: docker
     preemptible: preemptible_tries
     memory: "3000 MiB"
     disks: "local-disk ~{disk_size} HDD"
@@ -293,7 +294,7 @@ task DragenHardFilterVcf {
     Boolean make_gvcf
     String vcf_basename
     Int preemptible_tries
-    String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
+    String gatk_docker 
   }
 
   Int disk_size = ceil(2 * size(input_vcf, "GiB")) + 20
diff --git a/tasks/broad/Qc.wdl b/tasks/broad/Qc.wdl
index 58c94f46e9..12d3208d86 100644
--- a/tasks/broad/Qc.wdl
+++ b/tasks/broad/Qc.wdl
@@ -677,6 +677,7 @@ task CollectVariantCallingMetrics {
     File evaluation_interval_list
     Boolean is_gvcf = true
     Int preemptible_tries
+    String docker
   }
 
   Int disk_size = ceil(size(input_vcf, "GiB") + size(dbsnp_vcf, "GiB")) + 20
@@ -692,7 +693,7 @@ task CollectVariantCallingMetrics {
       ~{true="GVCF_INPUT=true" false="" is_gvcf}
   }
   runtime {
-    docker: "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
+    docker: docker
     preemptible: preemptible_tries
     memory: "3000 MiB"
     disks: "local-disk " + disk_size + " HDD"

From ee114955d2c39a990e722f40307b48f93105ad6d Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 13 May 2024 15:15:15 -0400
Subject: [PATCH 114/186] add echo statement

---
 pipelines/skylab/snm3C/snm3C.wdl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 7275720e77..a940d8fa77 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -281,6 +281,8 @@ task Hisat_paired_end {
     }
 
     command <<<
+        set -euo pipefail
+
         WORKING_DIR=`pwd`
         mkdir -p $WORKING_DIR/pipeline_inputs/
 
@@ -331,6 +333,9 @@ task Hisat_paired_end {
 
         demultiplexed_basename=$(basename ~{tarred_demultiplexed_fastqs})
         echo "the basename of the tarred_demultiplexed_fastqs is"
+        echo $demultiplexed_basename
+        echo "this is the wdl variable path:"
+        echo ~{tarred_demultiplexed_fastqs}
 
         pigz -dc $demultiplexed_basename | tar -xf -
         end=$(date +%s)

From 8b4fe34730fb58db3779876a8bc5a6f792d8d7d2 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 13 May 2024 17:05:53 -0400
Subject: [PATCH 115/186] fix batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index a940d8fa77..049f8033aa 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -353,6 +353,7 @@ task Hisat_paired_end {
             batch_dir="batch*/"
         else
             batch_dir="~{cromwell_root_dir}/*/*/*/*/batch*/"
+
         fi
         echo "batchdirectory: $batch_dir"
 

From 0ebc00f859bb3f9eaf6d79b70525fdc500d49b3f Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 13 May 2024 17:38:07 -0400
Subject: [PATCH 116/186] fix batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 049f8033aa..191d21b28f 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -352,8 +352,7 @@ task Hisat_paired_end {
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
-            batch_dir="~{cromwell_root_dir}/*/*/*/*/batch*/"
-
+            batch_dir="~{cromwell_root_dir}/*/*/*/*/*/*/*/*/*/*/*/batch*/"
         fi
         echo "batchdirectory: $batch_dir"
 

From ab63a12e21fc705a92045bdb825f030c0d514ed1 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 13 May 2024 19:42:17 -0400
Subject: [PATCH 117/186] fix path to hisat index files

---
 pipelines/skylab/snm3C/snm3C.wdl | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 191d21b28f..82d3651529 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -411,7 +411,14 @@ task Hisat_paired_end {
           # hisat run
           start=$(date +%s)
           echo "Run hisat"
-          hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
+          if [ ~{cromwell_root_dir} = "gcp" ]; then
+            hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename"
+          else
+            hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
+          fi
+          echo "hisat_index_file_dir: $hisat_index_file_dir"
+
+          hisat-3n $hisat_index_file_dir \
           -q \
           -1 ${sample_id}-R1_trimmed.fq.gz \
           -2 ${sample_id}-R2_trimmed.fq.gz \

From c01f2aa167c6bd3dffacaa809e8c4558370a93c0 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 13 May 2024 20:44:51 -0400
Subject: [PATCH 118/186] add pipline_inputs subdir to location of index files

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 82d3651529..ebebc74a6f 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -412,7 +412,7 @@ task Hisat_paired_end {
           start=$(date +%s)
           echo "Run hisat"
           if [ ~{cromwell_root_dir} = "gcp" ]; then
-            hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename"
+            hisat_index_file_dir="~{cromwell_root_dir}/pipeline_inputs/$genome_fa_basename"
           else
             hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
           fi

From 352c7afc0d512834f2249dad5f68b295834d5c3f Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 13 May 2024 21:09:48 -0400
Subject: [PATCH 119/186] add pipline_inputs subdir to location of index files
 again

---
 pipelines/skylab/snm3C/snm3C.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index ebebc74a6f..1f40a2d392 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -412,9 +412,9 @@ task Hisat_paired_end {
           start=$(date +%s)
           echo "Run hisat"
           if [ ~{cromwell_root_dir} = "gcp" ]; then
-            hisat_index_file_dir="~{cromwell_root_dir}/pipeline_inputs/$genome_fa_basename"
+            hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename"
           else
-            hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
+            hisat_index_file_dir="$WORKING_DIR/pipeline_inputs/$genome_fa_basename"
           fi
           echo "hisat_index_file_dir: $hisat_index_file_dir"
 

From 797ba497f002972d4eb2bff623209c6d074e4d87 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Mon, 13 May 2024 21:41:26 -0400
Subject: [PATCH 120/186] fix tar command

---
 pipelines/skylab/snm3C/snm3C.wdl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 1f40a2d392..981c16619d 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -522,7 +522,10 @@ task Hisat_paired_end {
       ####################################
 
       # tar up stats
-      dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz
+      echo "Tar up stats"
+      start=$(date +%s)
+      tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
+      tar -cf - *.hisat3n_dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz
       end=$(date +%s) 
       elapsed=$((end - start))  
       echo "Elapsed time to run tar stats $elapsed seconds"

From a1a952bed9a6dfb79e503e96546ab24ce7431228 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 09:14:52 -0400
Subject: [PATCH 121/186] some clean up, remove pipeline inputs subdir, edit
 single end task

---
 pipelines/skylab/snm3C/snm3C.wdl | 93 ++++++++------------------------
 1 file changed, 21 insertions(+), 72 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 981c16619d..d4c70e2d5c 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -197,14 +197,11 @@ task Demultiplexing {
                 adapter_name = 'A' + adapter_name.group(1)
                 if adapter_name in adapter_counts and adapter_counts[adapter_name] > threshold:
                     os.remove(file_path)
-                    print(f'Removed file: {filename}')
     CODE
 
     # Batch the fastq files into folders of batch_number size
     batch_number=~{batch_number}
-    echo "batch number: $batch_number"
     for i in $(seq 1 "${batch_number}"); do  # Use seq for reliable brace expansion
-        echo "making batch directory: batch${i}"
         mkdir -p "batch${i}"  # Combine batch and i, use -p to create parent dirs
     done
 
@@ -215,29 +212,19 @@ task Demultiplexing {
     # Define lists of r1 and r2 fq files
     R1_files=($(ls $WORKING_DIR | grep "\-R1.fq.gz"))
     R2_files=($(ls $WORKING_DIR | grep "\-R2.fq.gz"))
-    echo "R1 files: $R1_files"
-    echo "R2 files: $R2_files"
 
     # Distribute the FASTQ files and create TAR files
-    echo "starting loop of files"
     for file in "${R1_files[@]}"; do
         sample_id=$(basename "$file" "-R1.fq.gz")
-        echo "sampleId: $sample_id"
         r2_file="${sample_id}-R2.fq.gz"
-        echo "r2 file: $r2_file"
         mv $WORKING_DIR/$file batch$((folder_index))/$file
-        echo "moved $WORKING_DIR/$file to: batch$((folder_index))/$file"
         mv $WORKING_DIR/$r2_file batch$((folder_index))/$r2_file
-        echo "moved $WORKING_DIR/$r2_file to: batch$((folder_index))/$r2_file"
         # Increment the counter
         folder_index=$(( (folder_index % $batch_number) + 1 ))
-        echo "folder index is now: $folder_index"
     done
 
     # Tar up files per batch
     for i in $(seq 1 "${batch_number}"); do
-        echo " working on batch: batch${i}"
-        echo "tarring $WORKING_DIR/batch${i}/*.fq.gz and outputting:  ~{plate_id}.${i}.cutadapt_output_files.tar.gz"
         tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > ~{plate_id}.${i}.cutadapt_output_files.tar.gz
     done
   >>>
@@ -282,18 +269,7 @@ task Hisat_paired_end {
 
     command <<<
         set -euo pipefail
-
         WORKING_DIR=`pwd`
-        mkdir -p $WORKING_DIR/pipeline_inputs/
-
-        mv ~{tarred_demultiplexed_fastqs} $WORKING_DIR/pipeline_inputs/
-        mv ~{tarred_index_files} $WORKING_DIR/pipeline_inputs/
-        mv ~{genome_fa} $WORKING_DIR/pipeline_inputs/
-        mv ~{chromosome_sizes} $WORKING_DIR/pipeline_inputs/
-
-        cd $WORKING_DIR/pipeline_inputs/
-
-        ls -l
 
         # check genomic reference version and print to output txt file
         STRING=~{genome_fa}
@@ -304,11 +280,8 @@ task Hisat_paired_end {
         # untar the index files for hisat task
         start=$(date +%s)
         echo "Untarring tarred_index_files"
-
-        #take the basename of the demultiplexed fastq tar file
-        index_basename=$(basename ~{tarred_index_files})
-        pigz -dc $index_basename | tar -xf -
-        rm $index_basename
+        pigz -dc ~{tarred_index_files} | tar -xf -
+        rm ~{tarred_index_files}
 
         end=$(date +%s)
         elapsed=$((end - start))
@@ -329,33 +302,17 @@ task Hisat_paired_end {
         # untar the demultiplexed fastqs for sort and trim task
         start=$(date +%s)
         echo "Untar demultiplexed fastqs"
-        #take the basename of the demultiplexed fastq tar file
-
-        demultiplexed_basename=$(basename ~{tarred_demultiplexed_fastqs})
-        echo "the basename of the tarred_demultiplexed_fastqs is"
-        echo $demultiplexed_basename
-        echo "this is the wdl variable path:"
-        echo ~{tarred_demultiplexed_fastqs}
-
-        pigz -dc $demultiplexed_basename | tar -xf -
+        pigz -dc ~{tarred_demultiplexed_fastqs} | tar -xf -
         end=$(date +%s)
         elapsed=$((end - start))
         echo "Elapsed time to untar: $elapsed seconds"
 
-        echo "lsing current dir:"
-        ls -lR
-
-        echo "lsing root dir:"
-        ls -lR ~{cromwell_root_dir}
-
         # define lists of r1 and r2 fq files
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
             batch_dir="~{cromwell_root_dir}/*/*/*/*/*/*/*/*/*/*/*/batch*/"
         fi
-        echo "batchdirectory: $batch_dir"
-
 
         task() {
           local file=$1
@@ -364,9 +321,6 @@ task Hisat_paired_end {
 
           r2_file="${sample_id}-R2.fq.gz"
           r1_file="${sample_id}-R1.fq.gz"
-          echo "r1 file: $r1_file"
-          echo "r2 file: $r2_file"
-          echo "batch dir: $batch_dir"
           cp $batch_dir/"$r1_file" .
           cp $batch_dir/"$r2_file" .
 
@@ -414,9 +368,8 @@ task Hisat_paired_end {
           if [ ~{cromwell_root_dir} = "gcp" ]; then
             hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename"
           else
-            hisat_index_file_dir="$WORKING_DIR/pipeline_inputs/$genome_fa_basename"
+            hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
           fi
-          echo "hisat_index_file_dir: $hisat_index_file_dir"
 
           hisat-3n $hisat_index_file_dir \
           -q \
@@ -461,17 +414,6 @@ task Hisat_paired_end {
       R1_files=($(ls $batch_dir | grep "\-R1.fq.gz"))
       R2_files=($(ls $batch_dir | grep "\-R2.fq.gz"))
 
-      echo "Found r1 files: $R1_files"
-      echo "Found r2 files: $R2_files"
-
-      # for file in "${R1_files[@]}"; do
-      # (
-      #   echo "starting task $file.."
-      #   du -h  batch*/$file
-      #   task "$file"
-      # )
-      # done
-
       # run 6 instances of task in parallel
       for file in "${R1_files[@]}"; do
         (
@@ -493,13 +435,6 @@ task Hisat_paired_end {
       ####################################
       ## make sure that the number of output bams equals the length of R1_files
       # Count the number of *.hisat3n_dna.unique_aligned.bam files
-      echo "lsing batch dir"
-      ls $batch_dir
-      echo "ls current dir"
-      ls
-      echo "lsing working dir"
-      echo $WORKING_DIR
-
       bam_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l)
       fastq_counts=$(find . -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l)
 
@@ -585,6 +520,7 @@ task Hisat_single_end {
         set -euo pipefail
         set -x
         lscpu
+        WORKING_DIR=`pwd`
         
         # untar the tarred index files
         echo "Untar tarred_index_files"
@@ -623,16 +559,27 @@ task Hisat_single_end {
         R1_files=($(ls | grep "\.hisat3n_dna.split_reads.R1.fastq"))
         R2_files=($(ls | grep "\.hisat3n_dna.split_reads.R2.fastq"))
 
+        echo "Found R1 files: $R1_files"
+        echo "Found R2 files: $R2_files"
+
+
         task() {
           BASE=$(basename "$file" ".hisat3n_dna.split_reads.R1.fastq")
           echo $BASE
           echo "Running hisat on sample_id_R1" $BASE
           
           echo "Hisat 3n R1" 
-          start=$(date +%s) 
+          start=$(date +%s)
+
+          if [ ~{cromwell_root_dir} = "gcp" ]; then
+            hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename"
+          else
+            hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
+          fi
+
    
           # hisat on R1 single end
-          hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
+          hisat-3n $hisat_index_file_dir \
           -q \
           -U ${BASE}.hisat3n_dna.split_reads.R1.fastq \
           -S ${BASE}.hisat3n_dna.split_reads.R1.sam --directional-mapping-reverse --base-change C,T \
@@ -654,7 +601,7 @@ task Hisat_single_end {
          echo "Running hisat on sample_id_R2" $BASE
 
          # hisat on R2 single end
-         hisat-3n ~{cromwell_root_dir}/$genome_fa_basename \
+         hisat-3n $hisat_index_file_dir \
          -q \
          -U ${BASE}.hisat3n_dna.split_reads.R2.fastq \
          -S ${BASE}.hisat3n_dna.split_reads.R2.sam --directional-mapping --base-change C,T \
@@ -695,6 +642,8 @@ task Hisat_single_end {
          echo "Elapsed time to run samtools -q 10 $elapsed seconds"
 
          # remove_overlap_read_parts
+         echo "recusively ls cromwell root"
+         ls -lR ~{cromwell_root_dir}
          echo "call remove_overlap_read_parts" 
          start=$(date +%s) 
          python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=os.path.join(os.path.sep,~{cromwell_root_dir},"'"$BASE"'.name_sorted.filtered.bam"),out_bam_path=os.path.join(os.path.sep,~{cromwell_root_dir},"'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam"))'

From 5162ae40125d70fcc3460e5fd7538d130d3d0a5c Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 10:16:49 -0400
Subject: [PATCH 122/186] copy fa file again

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index d4c70e2d5c..5866bdb24d 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -282,12 +282,12 @@ task Hisat_paired_end {
         echo "Untarring tarred_index_files"
         pigz -dc ~{tarred_index_files} | tar -xf -
         rm ~{tarred_index_files}
-
         end=$(date +%s)
         elapsed=$((end - start))
         echo "Elapsed time to untar tarred_index_files: $elapsed seconds"
 
         # get the basename of the genome_fa file
+        cp ~{genome_fa} .
         genome_fa_basename=$(basename ~{genome_fa} .fa)
 
         start=$(date +%s)

From 33b28b7e47634430a593cbb6161f8d210c5ff2f3 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 14 May 2024 10:41:38 -0400
Subject: [PATCH 123/186] rename some inputs for paired tag demultiplexing

---
 tasks/skylab/PairedTagUtils.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl
index 8fc4d0d7a1..0fe171b60e 100644
--- a/tasks/skylab/PairedTagUtils.wdl
+++ b/tasks/skylab/PairedTagUtils.wdl
@@ -108,9 +108,9 @@ task PairedTagDemultiplex {
           then
           echo "FASTQ has correct index length, no modification necessary"
           ls -lh
-          mv "~{input_id}_R2_prefix.fq.gz" "~{r2_base}.fq.gz"
-          mv "~{input_id}_R1_prefix.fq.gz" "~{r1_base}.fq.gz"
-          mv "~{input_id}_R3_prefix.fq.gz" "~{r3_base}.fq.gz"
+          mv "~{input_id}_R2.fq.gz" "~{r2_base}.fq.gz"
+          mv "~{input_id}_R1.fq.gz" "~{r1_base}.fq.gz"
+          mv "~{input_id}_R3.fq.gz" "~{r3_base}.fq.gz"
         elif [[ $COUNT == 24 && ~{preindex} == "true" ]]
           then
           pass="false"

From c0dd6265a78cc5211f307c15953c8cb25e7e5b6a Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 14 May 2024 11:05:09 -0400
Subject: [PATCH 124/186] rename some intermediate inputs for paired tag
 demultiplexing

---
 tasks/skylab/PairedTagUtils.wdl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl
index a34b3c3187..ca5b6cf885 100644
--- a/tasks/skylab/PairedTagUtils.wdl
+++ b/tasks/skylab/PairedTagUtils.wdl
@@ -106,9 +106,9 @@ task PairedTagDemultiplex {
         elif [[ $COUNT == 24 && ~{preindex} == "false" ]]
           then
           echo "FASTQ has correct index length, no modification necessary"
-          mv "~{input_id}_R2_prefix.fq.gz" "~{r2_base}.fq.gz"
-          mv "~{input_id}_R1_prefix.fq.gz" "~{r1_base}.fq.gz"
-          mv "~{input_id}_R3_prefix.fq.gz" "~{r3_base}.fq.gz"
+          mv "~{input_id}_R2.fq.gz" "~{r2_base}.fq.gz"
+          mv "~{input_id}_R1.fq.gz" "~{r1_base}.fq.gz"
+          mv "~{input_id}_R3.fq.gz" "~{r3_base}.fq.gz"
         elif [[ $COUNT == 24 && ~{preindex} == "true" ]]
           then
           pass="false"

From a2ebd1b59914a1ef324a349db5d3b2da83176f8d Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 11:34:39 -0400
Subject: [PATCH 125/186] list cromwell root

---
 pipelines/skylab/snm3C/snm3C.wdl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 5866bdb24d..bd0a46e169 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -307,6 +307,9 @@ task Hisat_paired_end {
         elapsed=$((end - start))
         echo "Elapsed time to untar: $elapsed seconds"
 
+        echo "recursively list cromwell roo"
+        ls -lR ~{cromwell_root_dir}
+
         # define lists of r1 and r2 fq files
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"

From 9704a24644d7a649434f767f97ac28b261853e49 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 12:05:37 -0400
Subject: [PATCH 126/186] fix batch dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index bd0a46e169..7aca66de5f 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -314,7 +314,8 @@ task Hisat_paired_end {
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
-            batch_dir="~{cromwell_root_dir}/*/*/*/*/*/*/*/*/*/*/*/batch*/"
+            batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/"
+
         fi
 
         task() {

From aff8a9ca458c6a08cf7e1f228898f469573959f7 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 13:30:01 -0400
Subject: [PATCH 127/186] add conditional for input and output bams

---
 pipelines/skylab/snm3C/snm3C.wdl | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 7aca66de5f..e339e276b7 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -307,15 +307,11 @@ task Hisat_paired_end {
         elapsed=$((end - start))
         echo "Elapsed time to untar: $elapsed seconds"
 
-        echo "recursively list cromwell roo"
-        ls -lR ~{cromwell_root_dir}
-
         # define lists of r1 and r2 fq files
         if [ ~{cromwell_root_dir} = "gcp" ]; then
             batch_dir="batch*/"
         else
             batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/"
-
         fi
 
         task() {
@@ -648,9 +644,21 @@ task Hisat_single_end {
          # remove_overlap_read_parts
          echo "recusively ls cromwell root"
          ls -lR ~{cromwell_root_dir}
+
+         if [ ~{cromwell_root_dir} = "gcp" ]; then
+            filtered_bam_path="~{cromwell_root_dir}/$BASE.name_sorted.filtered.bam"
+            read_overlap_bam_path="~{cromwell_root_dir}/$BASE.hisat3n_dna.split_reads.read_overlap.bam"
+         else
+            filtered_bam_path="$WORKING_DIR/$BASE.name_sorted.filtered.bam"
+            read_overlap_bam_path="$WORKING_DIR/$BASE.hisat3n_dna.split_reads.read_overlap.bam"
+         fi
+
+         echo "filtered bam path: $filtered_bam_path"
+         echo "read overlap bam path: $read_overlap_bam_path"
+
          echo "call remove_overlap_read_parts" 
          start=$(date +%s) 
-         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=os.path.join(os.path.sep,~{cromwell_root_dir},"'"$BASE"'.name_sorted.filtered.bam"),out_bam_path=os.path.join(os.path.sep,~{cromwell_root_dir},"'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam"))'
+         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=$filtered_bam_path,out_bam_path=$read_overlap_bam_path)'
          end=$(date +%s) 
          elapsed=$((end - start))  
          echo "Elapsed time to run remove overlap $elapsed seconds"
@@ -846,7 +854,7 @@ task Merge_sort_analyze {
         echo "Elapsed time to samtools index $elapsed seconds" 
         
         start=$(date +%s)  
-        echo "Call chromatin contacts from name sorted bams" 
+        echo "Call chromatin contacts from name sorted bams"
         python3 -c 'from cemba_data.hisat3n import *;import os;import glob;call_chromatin_contacts(bam_path="'"$sample_id"'.hisat3n_dna.all_reads.name_sort.bam",contact_prefix="'"$sample_id"'.hisat3n_dna.all_reads",save_raw=False,save_hic_format=True)'
         end=$(date +%s) 
         elapsed=$((end - start)) 

From f5ef9ad9cbd37bcf28e54aa59d0574248fce8821 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 14:03:14 -0400
Subject: [PATCH 128/186] fix logic for cloud provider vs. crowell root dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index e339e276b7..5f8195bd02 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -72,7 +72,8 @@ workflow snm3C {
                 r2_right_cut = r2_right_cut,
                 plate_id = plate_id,
                 docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
+                cromwell_root_dir = cromwell_root_dir,
+                cloud_provider = cloud_provider,
         }
 
         call Hisat_single_end as Hisat_single_end {
@@ -82,7 +83,8 @@ workflow snm3C {
                 genome_fa = genome_fa,
                 plate_id = plate_id,
                 docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
+                cromwell_root_dir = cromwell_root_dir,
+                cloud_provider = cloud_provider
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {
@@ -252,6 +254,7 @@ task Hisat_paired_end {
         String plate_id
         String docker
         String cromwell_root_dir
+        String cloud_provider
 
         String r1_adapter
         String r2_adapter
@@ -308,7 +311,7 @@ task Hisat_paired_end {
         echo "Elapsed time to untar: $elapsed seconds"
 
         # define lists of r1 and r2 fq files
-        if [ ~{cromwell_root_dir} = "gcp" ]; then
+        if [ ~{cloud_provider} = "gcp" ]; then
             batch_dir="batch*/"
         else
             batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/"
@@ -365,7 +368,7 @@ task Hisat_paired_end {
           # hisat run
           start=$(date +%s)
           echo "Run hisat"
-          if [ ~{cromwell_root_dir} = "gcp" ]; then
+          if [ ~{cloud_provider} = "gcp" ]; then
             hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename"
           else
             hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
@@ -508,6 +511,7 @@ task Hisat_single_end {
         String plate_id
         String docker
         String cromwell_root_dir
+        String cloud_provider
 
         Int disk_size = 1000 
         Int mem_size = 64  
@@ -571,7 +575,7 @@ task Hisat_single_end {
           echo "Hisat 3n R1" 
           start=$(date +%s)
 
-          if [ ~{cromwell_root_dir} = "gcp" ]; then
+          if [ ~{cloud_provider} = "gcp" ]; then
             hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename"
           else
             hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
@@ -645,7 +649,7 @@ task Hisat_single_end {
          echo "recusively ls cromwell root"
          ls -lR ~{cromwell_root_dir}
 
-         if [ ~{cromwell_root_dir} = "gcp" ]; then
+         if [ ~{cloud_provider} = "gcp" ]; then
             filtered_bam_path="~{cromwell_root_dir}/$BASE.name_sorted.filtered.bam"
             read_overlap_bam_path="~{cromwell_root_dir}/$BASE.hisat3n_dna.split_reads.read_overlap.bam"
          else

From e12283aee0a1f58e7d5f3cdce61b8c506fd6aaef Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 14:12:05 -0400
Subject: [PATCH 129/186] add ls

---
 pipelines/skylab/snm3C/snm3C.wdl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 5f8195bd02..60910bf37d 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -310,9 +310,12 @@ task Hisat_paired_end {
         elapsed=$((end - start))
         echo "Elapsed time to untar: $elapsed seconds"
 
+        echo "lsing cromwell root dir"
+        ls -lR ~{cromwell_root_dir}
+
         # define lists of r1 and r2 fq files
         if [ ~{cloud_provider} = "gcp" ]; then
-            batch_dir="batch*/"
+            batch_dir="~{cromwell_root_dir}/batch*/"
         else
             batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/"
         fi

From c6a12161edba87f32a41b61177384f91092719e6 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 14:22:29 -0400
Subject: [PATCH 130/186] fix batch dir in gcp

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 60910bf37d..5a9e4ffb8f 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -315,7 +315,7 @@ task Hisat_paired_end {
 
         # define lists of r1 and r2 fq files
         if [ ~{cloud_provider} = "gcp" ]; then
-            batch_dir="~{cromwell_root_dir}/batch*/"
+            batch_dir="~{cromwell_root_dir}~{cromwell_root_dir}/batch*/"
         else
             batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/"
         fi

From 106347500d8b30645d7d5f94f8e759d6be6d63eb Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 14:33:51 -0400
Subject: [PATCH 131/186] fix sytaxt error

---
 pipelines/skylab/snm3C/snm3C.wdl | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 5a9e4ffb8f..ae9bf86051 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -653,11 +653,9 @@ task Hisat_single_end {
          ls -lR ~{cromwell_root_dir}
 
          if [ ~{cloud_provider} = "gcp" ]; then
-            filtered_bam_path="~{cromwell_root_dir}/$BASE.name_sorted.filtered.bam"
-            read_overlap_bam_path="~{cromwell_root_dir}/$BASE.hisat3n_dna.split_reads.read_overlap.bam"
+            bam_path_prefix="~{cromwell_root_dir}"
          else
-            filtered_bam_path="$WORKING_DIR/$BASE.name_sorted.filtered.bam"
-            read_overlap_bam_path="$WORKING_DIR/$BASE.hisat3n_dna.split_reads.read_overlap.bam"
+            bam_path_prefix=$WORKING_DIR
          fi
 
          echo "filtered bam path: $filtered_bam_path"
@@ -665,7 +663,7 @@ task Hisat_single_end {
 
          echo "call remove_overlap_read_parts" 
          start=$(date +%s) 
-         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=$filtered_bam_path,out_bam_path=$read_overlap_bam_path)'
+         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=os.path.join(os.path.sep,'"$bam_path_prefix"',"'"$BASE"'.name_sorted.filtered.bam"),out_bam_path=os.path.join(os.path.sep,"$bam_path_prefix","'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam"))'
          end=$(date +%s) 
          elapsed=$((end - start))  
          echo "Elapsed time to run remove overlap $elapsed seconds"

From fefba082c98c2e79181096a967612ddfaef5dd12 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 15:39:18 -0400
Subject: [PATCH 132/186] ugh remove echo statements referencing variable that
 doesnt exist anymore

---
 pipelines/skylab/snm3C/snm3C.wdl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index ae9bf86051..07ad57de59 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -658,8 +658,7 @@ task Hisat_single_end {
             bam_path_prefix=$WORKING_DIR
          fi
 
-         echo "filtered bam path: $filtered_bam_path"
-         echo "read overlap bam path: $read_overlap_bam_path"
+         echo "bam_path_prefix $bam_path_prefix"
 
          echo "call remove_overlap_read_parts" 
          start=$(date +%s) 

From 30a6b9368dba835903fe5c96f76bc5f1bbd90885 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 15:53:06 -0400
Subject: [PATCH 133/186] attempt to fix syntax error

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 07ad57de59..918d3fb3bc 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -662,7 +662,7 @@ task Hisat_single_end {
 
          echo "call remove_overlap_read_parts" 
          start=$(date +%s) 
-         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=os.path.join(os.path.sep,'"$bam_path_prefix"',"'"$BASE"'.name_sorted.filtered.bam"),out_bam_path=os.path.join(os.path.sep,"$bam_path_prefix","'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam"))'
+         python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path="'"$BASE"'.name_sorted.filtered.bam",out_bam_path="'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam")'
          end=$(date +%s) 
          elapsed=$((end - start))  
          echo "Elapsed time to run remove overlap $elapsed seconds"

From ebb3c39381932e25350a7aafcb34a325716869a1 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Tue, 14 May 2024 16:31:20 -0400
Subject: [PATCH 134/186] add missing forward slash

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 918d3fb3bc..554a5f4ad0 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -812,7 +812,7 @@ task Merge_sort_analyze {
       # make directories
       mkdir ~{cromwell_root_dir}/output_bams
       mkdir ~{cromwell_root_dir}temp
-      mkdir ~{cromwell_root_dir}allc-${mcg_context}
+      mkdir ~{cromwell_root_dir}/allc-${mcg_context}
       
       task() {
         local file=$1

From 36a2ce931028754aac4423d3535b8230bf986b60 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 15 May 2024 10:16:10 -0400
Subject: [PATCH 135/186] fix directories, add ls commands

---
 pipelines/skylab/snm3C/snm3C.wdl | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 554a5f4ad0..58005e9f64 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -811,7 +811,7 @@ task Merge_sort_analyze {
 
       # make directories
       mkdir ~{cromwell_root_dir}/output_bams
-      mkdir ~{cromwell_root_dir}temp
+      mkdir ~{cromwell_root_dir}/temp
       mkdir ~{cromwell_root_dir}/allc-${mcg_context}
       
       task() {
@@ -864,6 +864,9 @@ task Merge_sort_analyze {
         elapsed=$((end - start)) 
         echo "Elapsed time to chromatin contacts $elapsed seconds"
 
+        echo "recursively ls cromwell root"
+        ls -lR ~{cromwell_root_dir}
+
         start=$(date +%s)  
         echo "Call allcools bam-to-allc from deduped.bams" 
         /opt/conda/bin/allcools bam-to-allc \
@@ -936,8 +939,11 @@ task Merge_sort_analyze {
       echo "Number of output files matches the length of the array."
       ####################################
 
+      echo "recursively ls'sing cromwell root again"
+      ls -lR ~{cromwell_root_dir}
+
       echo "Tar files."      
-      tar -cf - output_bams/*.matrix.txt | pigz > ~{plate_id}.dedup_unique_bam_and_index_unique_bam_stats.tar.gz
+      tar -cf - ~{cromwell_root_dir}/output_bams/*.matrix.txt | pigz > ~{plate_id}.dedup_unique_bam_and_index_unique_bam_stats.tar.gz
       tar -cf - *.hisat3n_dna.all_reads.name_sort.bam | pigz > ~{plate_id}.hisat3n_dna.all_reads.name_sort.tar.gz
     
       # tar outputs of call_chromatin_contacts
@@ -949,7 +955,7 @@ task Merge_sort_analyze {
       tar -cf - *.allc.tsv.gz | pigz > ~{plate_id}.allc.tsv.tar.gz
       tar -cf - *.allc.tsv.gz.tbi | pigz > ~{plate_id}.allc.tbi.tar.gz
       tar -cf -  *.allc.tsv.gz.count.csv | pigz > ~{plate_id}.allc.count.tar.gz
-      tar -cf -  ~{cromwell_root_dir}allc-${mcg_context}/*.gz | pigz > ~{plate_id}.extract-allc.tar.gz
+      tar -cf -  ~{cromwell_root_dir}/allc-${mcg_context}/*.gz | pigz > ~{plate_id}.extract-allc.tar.gz
       tar -cf -  ~{cromwell_root_dir}/allc-${mcg_context}/*.tbi | pigz > ~{plate_id}.extract-allc_tbi.tar.gz
     >>>
 

From 18ad64cff0f16ff42b242ccede18f8849843b051 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 15 May 2024 13:12:36 -0400
Subject: [PATCH 136/186] add more ls commands

---
 pipelines/skylab/snm3C/snm3C.wdl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 58005e9f64..f5a350abd2 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1004,6 +1004,9 @@ task Summary {
     command <<<
         set -euo pipefail
 
+        echo "recursively ls'sing cromwell root in summary task"
+        ls -lR ~{cromwell_root_dir}
+
         mkdir ~{cromwell_root_dir}/fastq
         mkdir ~{cromwell_root_dir}/bam
         mkdir ~{cromwell_root_dir}/allc

From cd4f876f255e99ed76a72156bdde3e0b963739cb Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 15 May 2024 13:13:33 -0400
Subject: [PATCH 137/186] add more ls commands

---
 pipelines/skylab/snm3C/snm3C.wdl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index f5a350abd2..f18714036c 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1004,6 +1004,7 @@ task Summary {
     command <<<
         set -euo pipefail
 
+
         echo "recursively ls'sing cromwell root in summary task"
         ls -lR ~{cromwell_root_dir}
 

From 6b2889e152f28dfc87221807de8f35a4a45270f8 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 15 May 2024 13:13:44 -0400
Subject: [PATCH 138/186] add more ls commands again

---
 pipelines/skylab/snm3C/snm3C.wdl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index f18714036c..f5a350abd2 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1004,7 +1004,6 @@ task Summary {
     command <<<
         set -euo pipefail
 
-
         echo "recursively ls'sing cromwell root in summary task"
         ls -lR ~{cromwell_root_dir}
 

From 2c6d56b23ed2d9e8bd19a4a16b5add327a4d3f5f Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 15 May 2024 15:39:46 -0400
Subject: [PATCH 139/186] fix fasta file path

---
 pipelines/skylab/snm3C/snm3C.wdl | 262 ++++++++++++++++---------------
 1 file changed, 137 insertions(+), 125 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index f5a350abd2..ce858a19a9 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -98,7 +98,8 @@ workflow snm3C {
                chromosome_sizes = chromosome_sizes,
                plate_id = plate_id,
                docker = docker_prefix + m3c_yap_hisat_docker,
-               cromwell_root_dir = cromwell_root_dir
+               cromwell_root_dir = cromwell_root_dir,
+               cloud_provider = cloud_provider
         }
     }
 
@@ -467,24 +468,24 @@ task Hisat_paired_end {
       start=$(date +%s)
       tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz
       tar -cf - *.hisat3n_dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz
-      end=$(date +%s) 
-      elapsed=$((end - start))  
+      end=$(date +%s)
+      elapsed=$((end - start))
       echo "Elapsed time to run tar stats $elapsed seconds"
 
       # tar up the uniqe bams
       echo "Tar up unique bams"
       start=$(date +%s)
       tar -cf - *.hisat3n_dna.unique_aligned.bam | pigz > ~{plate_id}.hisat3n_paired_end_unique_bam_files.tar.gz
-      end=$(date +%s) 
-      elapsed=$((end - start))  
+      end=$(date +%s)
+      elapsed=$((end - start))
       echo "Elapsed time to run tar unique bams $elapsed seconds"
 
       # tar up the split fastq files
       echo "Tar up fastqs"
       start=$(date +%s)
       tar -cf - *.split_reads*.fastq | pigz > ~{plate_id}.hisat3n_paired_end_split_fastq_files.tar.gz
-      end=$(date +%s) 
-      elapsed=$((end - start))  
+      end=$(date +%s)
+      elapsed=$((end - start))
       echo "Elapsed time to run tar fastqs $elapsed seconds"
     >>>
 
@@ -516,11 +517,11 @@ task Hisat_single_end {
         String cromwell_root_dir
         String cloud_provider
 
-        Int disk_size = 1000 
-        Int mem_size = 64  
+        Int disk_size = 1000
+        Int mem_size = 64
         Int cpu = 32
         Int preemptible_tries = 2
-        String cpu_platform =  "Intel Ice Lake"    
+        String cpu_platform =  "Intel Ice Lake"
     }
 
     command <<<
@@ -528,40 +529,40 @@ task Hisat_single_end {
         set -x
         lscpu
         WORKING_DIR=`pwd`
-        
+
         # untar the tarred index files
         echo "Untar tarred_index_files"
-        start=$(date +%s)  
-        pigz -dc ~{tarred_index_files} | tar -xf - 
+        start=$(date +%s)
+        pigz -dc ~{tarred_index_files} | tar -xf -
         rm ~{tarred_index_files}
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to untar tarred_index_files: $elapsed seconds"
-    
+
         cp ~{genome_fa} .
 
         #get the basename of the genome_fa file
         echo "samtools faidx"
-        start=$(date +%s)  
+        start=$(date +%s)
         genome_fa_basename=$(basename ~{genome_fa} .fa)
         samtools faidx $genome_fa_basename.fa
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to samtools faidx: $elapsed seconds"
-    
+
         # untar the unmapped fastq files
         echo "Untar split_fq_tar"
-        start=$(date +%s)  
-        pigz -dc ~{split_fq_tar} | tar -xf - 
+        start=$(date +%s)
+        pigz -dc ~{split_fq_tar} | tar -xf -
         rm ~{split_fq_tar}
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to untar split_fq_tar: $elapsed seconds"
 
-        # make directories 
+        # make directories
         mkdir -p ~{cromwell_root_dir}/merged_sort_bams
         mkdir -p ~{cromwell_root_dir}/read_overlap
-   
+
         # define lists of r1 and r2 fq files
         R1_files=($(ls | grep "\.hisat3n_dna.split_reads.R1.fastq"))
         R2_files=($(ls | grep "\.hisat3n_dna.split_reads.R2.fastq"))
@@ -574,8 +575,8 @@ task Hisat_single_end {
           BASE=$(basename "$file" ".hisat3n_dna.split_reads.R1.fastq")
           echo $BASE
           echo "Running hisat on sample_id_R1" $BASE
-          
-          echo "Hisat 3n R1" 
+
+          echo "Hisat 3n R1"
           start=$(date +%s)
 
           if [ ~{cloud_provider} = "gcp" ]; then
@@ -584,7 +585,7 @@ task Hisat_single_end {
             hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename"
           fi
 
-   
+
           # hisat on R1 single end
           hisat-3n $hisat_index_file_dir \
           -q \
@@ -596,15 +597,15 @@ task Hisat_single_end {
           -t \
           --new-summary \
           --summary-file ${BASE}.hisat3n_dna_split_reads_summary.R1.txt \
-          --threads 8 
-        
-         end=$(date +%s) 
-         elapsed=$((end - start))  
+          --threads 8
+
+         end=$(date +%s)
+         elapsed=$((end - start))
          echo "Elapsed time to run $elapsed seconds"
          echo "Finish running hisat on sample_id_R1" $BASE
-         
-         echo "Hisat 3n R2" 
-         start=$(date +%s)        
+
+         echo "Hisat 3n R2"
+         start=$(date +%s)
          echo "Running hisat on sample_id_R2" $BASE
 
          # hisat on R2 single end
@@ -619,33 +620,33 @@ task Hisat_single_end {
          --summary-file ${BASE}.hisat3n_dna_split_reads_summary.R2.txt \
          --threads 8
 
-         end=$(date +%s) 
-         elapsed=$((end - start)) 
+         end=$(date +%s)
+         elapsed=$((end - start))
          echo "Elapsed time to run $elapsed seconds"
          echo "Finish running hisat on sample_id_R2" $BASE
-        
+
          # samtools merge
-         echo "samtools merge R1 and R2" 
-         start=$(date +%s)        
+         echo "samtools merge R1 and R2"
+         start=$(date +%s)
          samtools merge -o ${BASE}.name_merged.sam ${BASE}.hisat3n_dna.split_reads.R1.sam ${BASE}.hisat3n_dna.split_reads.R2.sam -@8
-         end=$(date +%s) 
-         elapsed=$((end - start))  
+         end=$(date +%s)
+         elapsed=$((end - start))
          echo "Elapsed time to run samtools merge $elapsed seconds"
-                  
-         # samtools sort 
-         echo "samtools sort R1 and R2" 
-         start=$(date +%s)        
+
+         # samtools sort
+         echo "samtools sort R1 and R2"
+         start=$(date +%s)
          samtools sort -n -@8 -m1g ${BASE}.name_merged.sam -o ${BASE}.name_sorted.bam
-         end=$(date +%s) 
-         elapsed=$((end - start)) 
+         end=$(date +%s)
+         elapsed=$((end - start))
          echo "Elapsed time to run samtools sort $elapsed seconds"
 
          # samtools filter bam
-         echo "samtools -q 10" 
-         start=$(date +%s)  
+         echo "samtools -q 10"
+         start=$(date +%s)
          samtools view -q 10 ${BASE}.name_sorted.bam -o ${BASE}.name_sorted.filtered.bam
-         end=$(date +%s) 
-         elapsed=$((end - start)) 
+         end=$(date +%s)
+         elapsed=$((end - start))
          echo "Elapsed time to run samtools -q 10 $elapsed seconds"
 
          # remove_overlap_read_parts
@@ -660,14 +661,14 @@ task Hisat_single_end {
 
          echo "bam_path_prefix $bam_path_prefix"
 
-         echo "call remove_overlap_read_parts" 
-         start=$(date +%s) 
+         echo "call remove_overlap_read_parts"
+         start=$(date +%s)
          python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path="'"$BASE"'.name_sorted.filtered.bam",out_bam_path="'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam")'
-         end=$(date +%s) 
-         elapsed=$((end - start))  
+         end=$(date +%s)
+         elapsed=$((end - start))
          echo "Elapsed time to run remove overlap $elapsed seconds"
-      
-         # remove files 
+
+         # remove files
          rm ${BASE}.hisat3n_dna.split_reads.R1.fastq ${BASE}.hisat3n_dna.split_reads.R2.fastq
          rm ${BASE}.hisat3n_dna.split_reads.R1.sam ${BASE}.hisat3n_dna.split_reads.R2.sam
          rm ${BASE}.name_merged.sam
@@ -695,10 +696,10 @@ task Hisat_single_end {
         ## make sure that the number of output bams equals the length of R1_files
         # Count the number of bam files
         bam_count=$(find . -maxdepth 1 -type f -name '*read_overlap.bam' | wc -l)
- 
+
         # Get the length of the array ${R1_files[@]}
         array_length=${#R1_files[@]}
-        
+
         # Check if the count of bams matches the length of the array ${R1_files[@]}
         if [ "$bam_count" -ne "$array_length" ]; then
            echo "Error: Number of BAM files does not match the length of the array."
@@ -712,16 +713,16 @@ task Hisat_single_end {
         # tar up the r1 and r2 stats files -p to set number of threads
         tar -cf - *.hisat3n_dna_split_reads_summary.R1.txt | pigz > ~{plate_id}.hisat3n_dna_split_reads_summary.R1.tar.gz
         tar -cf - *.hisat3n_dna_split_reads_summary.R2.txt | pigz > ~{plate_id}.hisat3n_dna_split_reads_summary.R2.tar.gz
-        end=$(date +%s) 
-        elapsed=$((end - start))  
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to run tar summary text files $elapsed seconds"
-     
+
         # tar up read overlap files
         echo "Tar up read_overlap bams"
         start=$(date +%s)
         tar -cf - *read_overlap.bam | pigz > ~{plate_id}.remove_overlap_read_parts.tar.gz
-        end=$(date +%s) 
-        elapsed=$((end - start))  
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to tar read_overlap bams $elapsed seconds"
     >>>
 
@@ -738,10 +739,10 @@ task Hisat_single_end {
          File hisat3n_dna_split_reads_summary_R1_tar = "~{plate_id}.hisat3n_dna_split_reads_summary.R1.tar.gz"
          File hisat3n_dna_split_reads_summary_R2_tar = "~{plate_id}.hisat3n_dna_split_reads_summary.R2.tar.gz"
          File remove_overlaps_output_bam_tar = "~{plate_id}.remove_overlap_read_parts.tar.gz"
-    
+
     }
 }
-  
+
 task Merge_sort_analyze {
     input {
         String plate_id
@@ -749,6 +750,7 @@ task Merge_sort_analyze {
         File read_overlap_tar
         String docker
         String cromwell_root_dir
+        String cloud_provider
 
         #input for allcools bam-to-allc
         File genome_fa
@@ -769,33 +771,35 @@ task Merge_sort_analyze {
       set -euo pipefail
       set -x
       lscpu
-      
+
+      WORKING_DIR=`pwd`
+
       # unzip tars
       echo "Untar paired_end_unique_tar"
-      start=$(date +%s)  
-      pigz -dc ~{paired_end_unique_tar} | tar -xf -  
+      start=$(date +%s)
+      pigz -dc ~{paired_end_unique_tar} | tar -xf -
       rm ~{paired_end_unique_tar}
-      end=$(date +%s) 
-      elapsed=$((end - start)) 
+      end=$(date +%s)
+      elapsed=$((end - start))
       echo "Elapsed time to untar paired_end_unique_tar: $elapsed seconds"
 
       echo "Untar read_overlap_tar"
-      start=$(date +%s)  
-      pigz -dc ~{read_overlap_tar} | tar -xf -  
+      start=$(date +%s)
+      pigz -dc ~{read_overlap_tar} | tar -xf -
       rm ~{read_overlap_tar}
-      end=$(date +%s) 
-      elapsed=$((end - start)) 
+      end=$(date +%s)
+      elapsed=$((end - start))
       echo "Elapsed time to untar read_overlap_tar: $elapsed seconds"
-      
-      # reference and index 
-      start=$(date +%s)  
+
+      # reference and index
+      start=$(date +%s)
       echo "Reference and index fasta"
       mkdir reference
       cp ~{genome_fa} reference
       ls reference
       samtools faidx reference/*.fa
-      end=$(date +%s) 
-      elapsed=$((end - start)) 
+      end=$(date +%s)
+      elapsed=$((end - start))
       echo "Elapsed time to index fasta $elapsed seconds"
 
       # define lists of r1 and r2 fq files
@@ -813,94 +817,102 @@ task Merge_sort_analyze {
       mkdir ~{cromwell_root_dir}/output_bams
       mkdir ~{cromwell_root_dir}/temp
       mkdir ~{cromwell_root_dir}/allc-${mcg_context}
-      
+
       task() {
         local file=$1
         sample_id=$(basename "$file" ".hisat3n_dna.unique_aligned.bam")
         echo $sample_id
 
-        start=$(date +%s)  
+        start=$(date +%s)
         echo "Merge all unique_aligned and read_overlap"
         samtools merge -f "${sample_id}.hisat3n_dna.all_reads.bam" "${sample_id}.hisat3n_dna.unique_aligned.bam" "${sample_id}.hisat3n_dna.split_reads.read_overlap.bam" -@4
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to run merge $elapsed seconds"
 
-        start=$(date +%s)  
+        start=$(date +%s)
         echo "Sort all reads by name"
-        samtools sort -n -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.name_sort.bam" "${sample_id}.hisat3n_dna.all_reads.bam" 
-        end=$(date +%s) 
-        elapsed=$((end - start))  
+        samtools sort -n -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.name_sort.bam" "${sample_id}.hisat3n_dna.all_reads.bam"
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to run sort by name $elapsed seconds"
-        
-        start=$(date +%s)  
+
+        start=$(date +%s)
         echo "Sort all reads by position"
-        samtools sort -O BAM -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.pos_sort.bam" "${sample_id}.hisat3n_dna.all_reads.name_sort.bam" 
-        end=$(date +%s) 
-        elapsed=$((end - start))  
+        samtools sort -O BAM -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.pos_sort.bam" "${sample_id}.hisat3n_dna.all_reads.name_sort.bam"
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to run sort by pos $elapsed seconds"
-        
-        start=$(date +%s)  
+
+        start=$(date +%s)
         echo "Call Picard remove duplicates"
         name=${sample_id}.hisat3n_dna.all_reads.deduped
         picard MarkDuplicates I=${sample_id}.hisat3n_dna.all_reads.pos_sort.bam O=~{cromwell_root_dir}/output_bams/${name}.bam \
         M=~{cromwell_root_dir}/output_bams/${name}.matrix.txt \
         REMOVE_DUPLICATES=true TMP_DIR=~{cromwell_root_dir}/temp
-        end=$(date +%s) 
-        elapsed=$((end - start))  
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to run picard $elapsed seconds"
-        
-        start=$(date +%s)  
+
+        start=$(date +%s)
         echo "Call samtools index"
         samtools index ~{cromwell_root_dir}/output_bams/${name}.bam
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
-        echo "Elapsed time to samtools index $elapsed seconds" 
-        
-        start=$(date +%s)  
+        end=$(date +%s)
+        elapsed=$((end - start))
+        echo "Elapsed time to samtools index $elapsed seconds"
+
+        start=$(date +%s)
         echo "Call chromatin contacts from name sorted bams"
         python3 -c 'from cemba_data.hisat3n import *;import os;import glob;call_chromatin_contacts(bam_path="'"$sample_id"'.hisat3n_dna.all_reads.name_sort.bam",contact_prefix="'"$sample_id"'.hisat3n_dna.all_reads",save_raw=False,save_hic_format=True)'
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to chromatin contacts $elapsed seconds"
 
         echo "recursively ls cromwell root"
         ls -lR ~{cromwell_root_dir}
 
-        start=$(date +%s)  
-        echo "Call allcools bam-to-allc from deduped.bams" 
+        if [ ~{cloud_provider} = "gcp" ]; then
+            reference_fasta="~{cromwell_root_dir}/reference/~{genome_base}"
+          else
+            reference_fasta="$WORKING_DIR/reference/~{genome_base}"
+        fi
+
+        echo "reference fast location: $reference_fasta"
+
+        start=$(date +%s)
+        echo "Call allcools bam-to-allc from deduped.bams"
         /opt/conda/bin/allcools bam-to-allc \
         --bam_path ~{cromwell_root_dir}/output_bams/${name}.bam \
-        --reference_fasta ~{cromwell_root_dir}/reference/~{genome_base} \
+        --reference_fasta $reference_fasta \
         --output_path "${sample_id}.allc.tsv.gz" \
         --num_upstr_bases ~{num_upstr_bases} \
         --num_downstr_bases ~{num_downstr_bases} \
         --compress_level ~{compress_level} \
         --save_count_df \
         --convert_bam_strandness
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to allcools bam-to-allc $elapsed seconds"
 
-        start=$(date +%s)  
-        echo "Call allcools extract-all" 
+        start=$(date +%s)
+        echo "Call allcools extract-all"
         allcools extract-allc --strandness merge \
         --allc_path ${sample_id}.allc.tsv.gz \
         --output_prefix ~{cromwell_root_dir}/allc-${mcg_context}/${sample_id} \
         --mc_contexts ${mcg_context} \
         --chrom_size_path ~{chromosome_sizes}
-        end=$(date +%s) 
-        elapsed=$((end - start)) 
+        end=$(date +%s)
+        elapsed=$((end - start))
         echo "Elapsed time to allcools extract-all $elapsed seconds"
-        
+
         echo "Remove some bams"
         rm ${sample_id}.hisat3n_dna.all_reads.bam
         rm ${sample_id}.hisat3n_dna.all_reads.pos_sort.bam
         rm ~{cromwell_root_dir}/${sample_id}.hisat3n_dna.split_reads.read_overlap.bam
         rm ~{cromwell_root_dir}/${sample_id}.hisat3n_dna.unique_aligned.bam
       }
- 
-      # run 4 instances of task in parallel 
+
+      # run 4 instances of task in parallel
       for file in "${UNIQUE_BAMS[@]}"; do
         (
           echo "starting task $file.."
@@ -922,7 +934,7 @@ task Merge_sort_analyze {
       # Count the number of *.hisat3n_dna.unique_aligned.bam files
       bam_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.all_reads.name_sort.bam' | wc -l)
       contact_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.all_reads.3C.contact.tsv.gz' | wc -l)
- 
+
       # Get the length of the array ${UNIQUE_BAMS[@]}
       array_length=${#UNIQUE_BAMS[@]}
 
@@ -942,15 +954,15 @@ task Merge_sort_analyze {
       echo "recursively ls'sing cromwell root again"
       ls -lR ~{cromwell_root_dir}
 
-      echo "Tar files."      
+      echo "Tar files."
       tar -cf - ~{cromwell_root_dir}/output_bams/*.matrix.txt | pigz > ~{plate_id}.dedup_unique_bam_and_index_unique_bam_stats.tar.gz
       tar -cf - *.hisat3n_dna.all_reads.name_sort.bam | pigz > ~{plate_id}.hisat3n_dna.all_reads.name_sort.tar.gz
-    
+
       # tar outputs of call_chromatin_contacts
       tar -cf - *.hisat3n_dna.all_reads.3C.contact.tsv.gz | pigz > ~{plate_id}.hisat3n_dna.all_reads.3C.contact.tar.gz
       tar -cf - *.hisat3n_dna.all_reads.dedup_contacts.tsv.gz | pigz > ~{plate_id}.hisat3n_dna.all_reads.dedup_contacts.tar.gz
       tar -cf - *.hisat3n_dna.all_reads.contact_stats.csv | pigz > ~{plate_id}.chromatin_contact_stats.tar.gz
-      
+
       # tar outputs of allcools
       tar -cf - *.allc.tsv.gz | pigz > ~{plate_id}.allc.tsv.tar.gz
       tar -cf - *.allc.tsv.gz.tbi | pigz > ~{plate_id}.allc.tbi.tar.gz
@@ -967,7 +979,7 @@ task Merge_sort_analyze {
         cpuPlatform: cpu_platform
         preemptible: preemptible_tries
     }
-    
+
      output {
         File allc = "~{plate_id}.allc.tsv.tar.gz"
         File tbi = "~{plate_id}.allc.tbi.tar.gz"

From edf0460b9024bee59454e50de4d07d13c874a4c3 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Wed, 15 May 2024 15:52:20 -0400
Subject: [PATCH 140/186] changing docker inputs to allow for tests to pass
 against pipelines that have not yet been azurized

---
 .../germline/variant_calling/VariantCalling.wdl     | 13 +++++++++----
 tasks/broad/BamProcessing.wdl                       |  3 ++-
 tasks/broad/DragenTasks.wdl                         |  3 ++-
 tasks/broad/GermlineVariantDiscovery.wdl            |  5 +++--
 tasks/broad/Qc.wdl                                  |  6 ++++--
 tasks/broad/Utilities.wdl                           |  3 ++-
 6 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index ca0c0de091..049ab2836d 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -44,9 +44,14 @@ workflow VariantCalling {
   String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
   String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
 
-  String picard_cloud_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
-  String picard_cloud_docker_azure = "dsppipelinedev.azurecr.io/picard-python:1.0.0-2.26.10-1663951039"
+  String picard_python_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
+  String picard_python_docker_azure = "dsppipelinedev.azurecr.io/picard-python:1.0.0-2.26.10-1663951039"
+  String picard_python_docker = if cloud_provider == "gcp" then picard_python_docker_gcp else picard_python_docker_azure
+
+  String picard_cloud_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
+  String picard_cloud_docker_azure = "dsppipelinedev.azurecr.io/picard-cloud:2.26.10"
   String picard_cloud_docker = if cloud_provider == "gcp" then picard_cloud_docker_gcp else picard_cloud_docker_azure
+  
 
   # make sure either gcp or azr is supplied as cloud_provider input
   if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
@@ -82,7 +87,7 @@ workflow VariantCalling {
       interval_list = calling_interval_list,
       scatter_count = haplotype_scatter_count,
       break_bands_at_multiples_of = break_bands_at_multiples_of,
-      docker = picard_cloud_docker
+      docker = picard_python_docker
   }
 
   # We need disk to localize the sharded input and output due to the scatter for HaplotypeCaller.
@@ -210,7 +215,7 @@ workflow VariantCalling {
       calling_interval_list = calling_interval_list,
       is_gvcf = make_gvcf,
       extra_args = if (skip_reblocking == false) then "--no-overlaps" else "",
-      docker_path = gatk_docker,
+      docker_path = picard_cloud_docker,
       preemptible_tries = agg_preemptible_tries
   }
 
diff --git a/tasks/broad/BamProcessing.wdl b/tasks/broad/BamProcessing.wdl
index 13d88c4f5f..cf4ff4d4e2 100644
--- a/tasks/broad/BamProcessing.wdl
+++ b/tasks/broad/BamProcessing.wdl
@@ -24,7 +24,8 @@ task SortSam {
     Int compression_level
     Int additional_disk = 20
     Int memory_multiplier = 1
-    String docker
+    #Setting default docker value for workflows that haven't yet been azurized. 
+    String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
   }
   # SortSam spills to disk a lot more because we are only store 300000 records in RAM now because its faster for our data so it needs
   # more disk space.  Also it spills to disk in an uncompressed format so we need to account for that with a larger multiplier
diff --git a/tasks/broad/DragenTasks.wdl b/tasks/broad/DragenTasks.wdl
index 7e28b793bd..95b27f2fcc 100644
--- a/tasks/broad/DragenTasks.wdl
+++ b/tasks/broad/DragenTasks.wdl
@@ -24,7 +24,8 @@ task CalibrateDragstrModel {
     File str_table_file
     File alignment ## can handle cram or bam.
     File alignment_index
-    String docker
+    #Setting default docker value for workflows that haven't yet been azurized. 
+    String docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
     Int preemptible_tries = 3
     Int threads = 4
     Int? memory_mb
diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl
index d6bcb77298..fb224e2fe7 100644
--- a/tasks/broad/GermlineVariantDiscovery.wdl
+++ b/tasks/broad/GermlineVariantDiscovery.wdl
@@ -27,7 +27,8 @@ task HaplotypeCaller_GATK35_GVCF {
     Float? contamination
     Int preemptible_tries
     Int hc_scatter
-    String docker
+    #Setting default docker value for workflows that haven't yet been azurized. 
+    String docker = "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384"
   }
 
   parameter_meta {
@@ -97,7 +98,7 @@ task HaplotypeCaller_GATK4_VCF {
     Boolean use_dragen_hard_filtering = false
     Boolean use_spanning_event_genotyping = true
     File? dragstr_model
-    String gatk_docker 
+    String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
     Int memory_multiplier = 1
   }
   
diff --git a/tasks/broad/Qc.wdl b/tasks/broad/Qc.wdl
index 12d3208d86..56a37e7973 100644
--- a/tasks/broad/Qc.wdl
+++ b/tasks/broad/Qc.wdl
@@ -622,7 +622,8 @@ task ValidateVCF {
     Int preemptible_tries = 3
     Boolean is_gvcf = true
     String? extra_args
-    String docker_path
+    #Setting default docker value for workflows that haven't yet been azurized. 
+    String docker_path = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
     Int machine_mem_mb = 7000
   }
 
@@ -677,7 +678,8 @@ task CollectVariantCallingMetrics {
     File evaluation_interval_list
     Boolean is_gvcf = true
     Int preemptible_tries
-    String docker
+    #Setting default docker value for workflows that haven't yet been azurized. 
+    String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
   }
 
   Int disk_size = ceil(size(input_vcf, "GiB") + size(dbsnp_vcf, "GiB")) + 20
diff --git a/tasks/broad/Utilities.wdl b/tasks/broad/Utilities.wdl
index 3ad524c90d..e6a1aeec17 100644
--- a/tasks/broad/Utilities.wdl
+++ b/tasks/broad/Utilities.wdl
@@ -79,7 +79,8 @@ task ScatterIntervalList {
     File interval_list
     Int scatter_count
     Int break_bands_at_multiples_of
-    String docker
+    #Setting default docker value for workflows that haven't yet been azurized. 
+    String docker = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
   }
 
   command <<<

From 429b63576128dc470773fbc2b5e603a93625b6be Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 15 May 2024 16:58:41 -0400
Subject: [PATCH 141/186] add more logging, put in cromwell root for move
 rather than local dir

---
 pipelines/skylab/snm3C/snm3C.wdl | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index ce858a19a9..c4fc7bdf83 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1016,8 +1016,11 @@ task Summary {
     command <<<
         set -euo pipefail
 
-        echo "recursively ls'sing cromwell root in summary task"
+        echo "recursively lsing cromwell root in summary task"
         ls -lR ~{cromwell_root_dir}
+        echo "lsing current dir"
+        ls -lrt
+
 
         mkdir ~{cromwell_root_dir}/fastq
         mkdir ~{cromwell_root_dir}/bam
@@ -1045,9 +1048,12 @@ task Summary {
         extract_and_remove ~{sep=' ' allc_uniq_reads_stats}
         extract_and_remove ~{sep=' ' unique_reads_cgn_extraction_tbi}
 
+        echo "lsing cromwell root again"
+        ls -lrt ~{cromwell_root_dir}
+
         mv *.trimmed.stats.txt ~{cromwell_root_dir}/fastq
         mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt ~{cromwell_root_dir}/bam
-        mv output_bams/*.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
+        mv ~{cromwell_root_dir}/output_bams/*.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
         mv *.hisat3n_dna.all_reads.contact_stats.csv ~{cromwell_root_dir}/hic
         mv *.allc.tsv.gz.count.csv ~{cromwell_root_dir}/allc
         mv ~{cromwell_root_dir}/allc-CGN/*.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc

From 2c3a6c5072a205261f3eee75f5600f414b275310 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 16 May 2024 09:46:29 -0400
Subject: [PATCH 142/186] fix to docker task inputs to allow default values

---
 tasks/broad/GermlineVariantDiscovery.wdl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl
index fb224e2fe7..d6b6e55cc3 100644
--- a/tasks/broad/GermlineVariantDiscovery.wdl
+++ b/tasks/broad/GermlineVariantDiscovery.wdl
@@ -98,6 +98,7 @@ task HaplotypeCaller_GATK4_VCF {
     Boolean use_dragen_hard_filtering = false
     Boolean use_spanning_event_genotyping = true
     File? dragstr_model
+    #Setting default docker value for workflows that haven't yet been azurized. 
     String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
     Int memory_multiplier = 1
   }
@@ -172,7 +173,8 @@ task MergeVCFs {
     Array[File] input_vcfs_indexes
     String output_vcf_name
     Int preemptible_tries = 3
-    String docker
+    #Setting default docker value for workflows that haven't yet been azurized. 
+    String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
   }
 
   Int disk_size = ceil(size(input_vcfs, "GiB") * 2.5) + 10

From ca4867a1581338ae7c05f50fc85001265fee71c7 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 16 May 2024 11:10:14 -0400
Subject: [PATCH 143/186] add more logging, add more ls, try tarring files in
 current dir rather than previous subdir

---
 pipelines/skylab/snm3C/snm3C.wdl | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index c4fc7bdf83..898bfdf012 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1034,7 +1034,9 @@ task Summary {
                     return
             fi
             for tar in "${@}"; do
-                tar -xf "$tar"
+                echo "unstarring this file now: $tar"
+                tar -xfv "$tar"
+                echo "removing this tar file now: $tar"
                 rm "$tar"
             done
         }
@@ -1049,14 +1051,17 @@ task Summary {
         extract_and_remove ~{sep=' ' unique_reads_cgn_extraction_tbi}
 
         echo "lsing cromwell root again"
-        ls -lrt ~{cromwell_root_dir}
+        ls -lRt ~{cromwell_root_dir}
+
+        echo "lsing current directory again"
+        ls -lRt
 
         mv *.trimmed.stats.txt ~{cromwell_root_dir}/fastq
         mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt ~{cromwell_root_dir}/bam
-        mv ~{cromwell_root_dir}/output_bams/*.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
+        mv *.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
         mv *.hisat3n_dna.all_reads.contact_stats.csv ~{cromwell_root_dir}/hic
         mv *.allc.tsv.gz.count.csv ~{cromwell_root_dir}/allc
-        mv ~{cromwell_root_dir}/allc-CGN/*.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc
+        mv *.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc
 
         python3 -c 'from cemba_data.hisat3n import *;snm3c_summary()'
         mv MappingSummary.csv.gz ~{plate_id}_MappingSummary.csv.gz

From 0a4ac85a8e3840760cd48bb86b1823ba7fa6bf9a Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 16 May 2024 12:18:38 -0400
Subject: [PATCH 144/186] use correct syntax for verbose untarring

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 898bfdf012..36eddce6eb 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1035,7 +1035,7 @@ task Summary {
             fi
             for tar in "${@}"; do
                 echo "unstarring this file now: $tar"
-                tar -xfv "$tar"
+                tar -xvf "$tar"
                 echo "removing this tar file now: $tar"
                 rm "$tar"
             done

From b6ea9a5b423b58d8122b952e553b8f4f7928d3b8 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 16 May 2024 14:59:20 -0400
Subject: [PATCH 145/186] fix to docker version

---
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl   | 2 +-
 tasks/broad/Qc.wdl                                              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 049ab2836d..00d40172f8 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -215,7 +215,7 @@ workflow VariantCalling {
       calling_interval_list = calling_interval_list,
       is_gvcf = make_gvcf,
       extra_args = if (skip_reblocking == false) then "--no-overlaps" else "",
-      docker_path = picard_cloud_docker,
+      docker_path = gatk_docker,
       preemptible_tries = agg_preemptible_tries
   }
 
diff --git a/tasks/broad/Qc.wdl b/tasks/broad/Qc.wdl
index 56a37e7973..847a9b4683 100644
--- a/tasks/broad/Qc.wdl
+++ b/tasks/broad/Qc.wdl
@@ -623,7 +623,7 @@ task ValidateVCF {
     Boolean is_gvcf = true
     String? extra_args
     #Setting default docker value for workflows that haven't yet been azurized. 
-    String docker_path = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10"
+    String docker_path = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
     Int machine_mem_mb = 7000
   }
 

From 0db052e84e2022044ef2459a7962400a160049df Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Thu, 16 May 2024 16:21:27 -0400
Subject: [PATCH 146/186] set correct base dirs that are nested

---
 pipelines/skylab/snm3C/snm3C.wdl | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 36eddce6eb..87aa1ecf43 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -115,7 +115,8 @@ workflow snm3C {
             unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar,
             plate_id = plate_id,
             docker = docker_prefix + m3c_yap_hisat_docker,
-            cromwell_root_dir = cromwell_root_dir
+            cromwell_root_dir = cromwell_root_dir,
+            cloud_provider = cloud_provider
     }
 
     meta {
@@ -1006,6 +1007,7 @@ task Summary {
         Array[File] unique_reads_cgn_extraction_tbi
         String plate_id
         String cromwell_root_dir
+        String cloud_provider
 
         String docker
         Int disk_size = 80
@@ -1056,12 +1058,24 @@ task Summary {
         echo "lsing current directory again"
         ls -lRt
 
+        WORKING_DIR=`pwd`
+
+        if [ ~{cloud_provider} = "gcp" ]; then
+            matrix_files_dir="~{cromwell_root_dir}~{cromwell_root_dir}/output_bams"
+            allc_index_dir="~{cromwell_root_dir}~{cromwell_root_dir}/allc-*"
+        else
+            matrix_files_dir="$WORKING_DIR~{cromwell_root_dir}/output_bams"
+            allc_index_dir="$WORKING_DIR~{cromwell_root_dir}/allc-*"
+        fi
+        echo "matrix files dir: $matrix_files_dir"
+        echo "allc_index_dir: $allc_index_dir"
+
         mv *.trimmed.stats.txt ~{cromwell_root_dir}/fastq
         mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt ~{cromwell_root_dir}/bam
-        mv *.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
+        mv $matrix_files_dir/*.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
         mv *.hisat3n_dna.all_reads.contact_stats.csv ~{cromwell_root_dir}/hic
         mv *.allc.tsv.gz.count.csv ~{cromwell_root_dir}/allc
-        mv *.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc
+        mv $allc_index_dir/*.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc
 
         python3 -c 'from cemba_data.hisat3n import *;snm3c_summary()'
         mv MappingSummary.csv.gz ~{plate_id}_MappingSummary.csv.gz

From 8878e2f525227d97f1a6808d0f845b56bb1bbc8a Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Fri, 17 May 2024 12:00:02 -0400
Subject: [PATCH 147/186] change to how cromwell root dir is set

---
 pipelines/skylab/snm3C/snm3C.wdl | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 689bba7849..076e8f1486 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -72,7 +72,6 @@ workflow snm3C {
                 r2_right_cut = r2_right_cut,
                 plate_id = plate_id,
                 docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
         }
 
         call Hisat_single_end as Hisat_single_end {
@@ -82,7 +81,6 @@ workflow snm3C {
                 genome_fa = genome_fa,
                 plate_id = plate_id,
                 docker = docker_prefix + m3c_yap_hisat_docker,
-                cromwell_root_dir = cromwell_root_dir
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {
@@ -252,7 +250,6 @@ task Hisat_paired_end {
         File chromosome_sizes
         String plate_id
         String docker
-        String cromwell_root_dir
 
         String r1_adapter
         String r2_adapter
@@ -267,7 +264,10 @@ task Hisat_paired_end {
         Int preemptible_tries = 2
         String cpu_platform =  "Intel Ice Lake"
     }
-
+    
+    cromwell_root_dir=$(pwd)
+    batch_dir=$cromwell_root_dir/batch*
+    
     command <<<
         set -euo pipefail
         set -x
@@ -315,13 +315,6 @@ task Hisat_paired_end {
         echo "lsing cromwell root:"
         ls -lR ~{cromwell_root_dir}
 
-        # define lists of r1 and r2 fq files
-        if [ ~{cromwell_root_dir} = "gcp" ]; then
-            batch_dir="batch*/"
-        else
-            batch_dir="~{cromwell_root_dir}/*/*/*/*/*/~{cromwell_root_dir}/*/*/*/*/batch*/"
-        fi
-        echo "batchdirectory: $batch_dir"
 
 
         task() {

From 5d6aaf4a083828d7f402960b8d766e4a7daf9ffe Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Fri, 17 May 2024 12:03:29 -0400
Subject: [PATCH 148/186] correction to docker image being used

---
 .../dna_seq/germline/variant_calling/VariantCalling.wdl     | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 00d40172f8..a7a53b444a 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -43,6 +43,10 @@ workflow VariantCalling {
   String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
   String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
   String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
+  
+  String gatk_1_3_docker_gcp = us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384
+  String gatk_1_3_docker_azure = us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384
+  String gatk_1_3_docker if cloud_provider == "gcp" then gatk_1_3_docker_gcp else gatk_1_3_docker_azure
 
   String picard_python_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
   String picard_python_docker_azure = "dsppipelinedev.azurecr.io/picard-python:1.0.0-2.26.10-1663951039"
@@ -112,7 +116,7 @@ workflow VariantCalling {
           contamination = contamination,
           preemptible_tries = agg_preemptible_tries,
           hc_scatter = hc_divisor,
-          docker = gatk_docker
+          docker = gatk_1_3_docker
       }
     }
 

From 2fe6f9230c46ec4e87294e9ea1f43316b9a9ecff Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 17 May 2024 13:57:23 -0400
Subject: [PATCH 149/186] add debugging

---
 pipelines/skylab/snm3C/snm3C.wdl | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 87aa1ecf43..96cdb69e03 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1077,7 +1077,33 @@ task Summary {
         mv *.allc.tsv.gz.count.csv ~{cromwell_root_dir}/allc
         mv $allc_index_dir/*.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc
 
-        python3 -c 'from cemba_data.hisat3n import *;snm3c_summary()'
+        cwd=`pwd`
+        echo "current working dir is: $cwd"
+
+
+        python3 <<CODE
+        from cemba_data.hisat3n import *
+        import os
+        working_dir = os.getcwd()
+        print(f"Current working direcetory is: {working_dir}")
+
+        print("Calling summary function")
+        snm3c_summary()
+
+        print("Called summry function")
+
+        working_dir = os.getcwd()
+        print(f"Current working direcetory is: {working_dir}")
+        print("These are the files located here:")
+        os.listdir()
+
+        CODE
+
+        cwd=`pwd`
+        echo "current working dir is: $cwd"
+        echo "recursively lsing cromwell root"
+        ls -lRt ~{cromwell_root_dir}
+
         mv MappingSummary.csv.gz ~{plate_id}_MappingSummary.csv.gz
 
     >>>

From ff3e6b27bec5b7c259311d002c6a1abb2c7b63de Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Fri, 17 May 2024 14:17:17 -0400
Subject: [PATCH 150/186] formatting fix

---
 .../dna_seq/germline/variant_calling/VariantCalling.wdl     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index a7a53b444a..98b9fb77ee 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -44,9 +44,9 @@ workflow VariantCalling {
   String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
   String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
   
-  String gatk_1_3_docker_gcp = us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384
-  String gatk_1_3_docker_azure = us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384
-  String gatk_1_3_docker if cloud_provider == "gcp" then gatk_1_3_docker_gcp else gatk_1_3_docker_azure
+  String gatk_1_3_docker_gcp = "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384"
+  String gatk_1_3_docker_azure = "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384"
+  String gatk_1_3_docker = if cloud_provider == "gcp" then gatk_1_3_docker_gcp else gatk_1_3_docker_azure
 
   String picard_python_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
   String picard_python_docker_azure = "dsppipelinedev.azurecr.io/picard-python:1.0.0-2.26.10-1663951039"

From 45de2a335e82d3a45246d47b406c843ba19b7415 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Fri, 17 May 2024 16:07:38 -0400
Subject: [PATCH 151/186] add logic for base directory based on gcp vs azure

---
 pipelines/skylab/snm3C/snm3C.wdl | 52 +++++++++++++++++---------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 96cdb69e03..0f206de50c 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -1023,11 +1023,26 @@ task Summary {
         echo "lsing current dir"
         ls -lrt
 
+        WORKING_DIR=`pwd`
+
+        if [ ~{cloud_provider} = "gcp" ]; then
+            base_directory=~{cromwell_root_dir}
+            matrix_files_dir="~{cromwell_root_dir}~{cromwell_root_dir}/output_bams"
+            allc_index_dir="~{cromwell_root_dir}~{cromwell_root_dir}/allc-*"
+        else
+            base_directory=$WORKING_DIR
+            matrix_files_dir="$WORKING_DIR~{cromwell_root_dir}/output_bams"
+            allc_index_dir="$WORKING_DIR~{cromwell_root_dir}/allc-*"
+        fi
+        echo "matrix files dir: $matrix_files_dir"
+        echo "allc_index_dir: $allc_index_dir"
+        echo "base directory is: $base_directory"
+
 
-        mkdir ~{cromwell_root_dir}/fastq
-        mkdir ~{cromwell_root_dir}/bam
-        mkdir ~{cromwell_root_dir}/allc
-        mkdir ~{cromwell_root_dir}/hic
+        mkdir $base_directory/fastq
+        mkdir $base_directory/bam
+        mkdir $base_directory/allc
+        mkdir $base_directory/hic
 
         extract_and_remove() {
             if [ $# -eq 0 ];
@@ -1058,24 +1073,12 @@ task Summary {
         echo "lsing current directory again"
         ls -lRt
 
-        WORKING_DIR=`pwd`
-
-        if [ ~{cloud_provider} = "gcp" ]; then
-            matrix_files_dir="~{cromwell_root_dir}~{cromwell_root_dir}/output_bams"
-            allc_index_dir="~{cromwell_root_dir}~{cromwell_root_dir}/allc-*"
-        else
-            matrix_files_dir="$WORKING_DIR~{cromwell_root_dir}/output_bams"
-            allc_index_dir="$WORKING_DIR~{cromwell_root_dir}/allc-*"
-        fi
-        echo "matrix files dir: $matrix_files_dir"
-        echo "allc_index_dir: $allc_index_dir"
-
-        mv *.trimmed.stats.txt ~{cromwell_root_dir}/fastq
-        mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt ~{cromwell_root_dir}/bam
-        mv $matrix_files_dir/*.hisat3n_dna.all_reads.deduped.matrix.txt ~{cromwell_root_dir}/bam
-        mv *.hisat3n_dna.all_reads.contact_stats.csv ~{cromwell_root_dir}/hic
-        mv *.allc.tsv.gz.count.csv ~{cromwell_root_dir}/allc
-        mv $allc_index_dir/*.allc.tsv.gz.tbi ~{cromwell_root_dir}/allc
+        mv *.trimmed.stats.txt $base_directory/fastq
+        mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt $base_directory/bam
+        mv $matrix_files_dir/*.hisat3n_dna.all_reads.deduped.matrix.txt $base_directory/bam
+        mv *.hisat3n_dna.all_reads.contact_stats.csv $base_directory/hic
+        mv *.allc.tsv.gz.count.csv $base_directory/allc
+        mv $allc_index_dir/*.allc.tsv.gz.tbi $base_directory/allc
 
         cwd=`pwd`
         echo "current working dir is: $cwd"
@@ -1090,12 +1093,13 @@ task Summary {
         print("Calling summary function")
         snm3c_summary()
 
-        print("Called summry function")
+        print("Called summary function")
 
         working_dir = os.getcwd()
         print(f"Current working direcetory is: {working_dir}")
         print("These are the files located here:")
-        os.listdir()
+        files = os.listdir()
+        print(files)
 
         CODE
 

From 6cc717b0790d41ac5cfdded34c4f6845895b37d5 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Mon, 20 May 2024 14:08:08 -0400
Subject: [PATCH 152/186] added notes to affected changelogs

---
 .../exome/ExomeGermlineSingleSample.changelog.md            | 5 +++++
 .../ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md    | 6 ++++++
 .../wgs/WholeGenomeGermlineSingleSample.changelog.md        | 3 +++
 pipelines/skylab/optimus/Optimus.changelog.md               | 5 +++++
 pipelines/skylab/snm3C/snm3C.changelog.md                   | 4 ++++
 5 files changed, 23 insertions(+)

diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
index acfffcef76..d30b3f70fe 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.21
+2024-05-20 (Date of Last Commit)
+
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+
 # 3.1.20
 2024-04-08 (Date of Last Commit)
 
diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
index 388d75b7fb..703ad6af50 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
@@ -1,3 +1,9 @@
+# 1.0.18
+2024-05-20 (Date of Last Commit)
+
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+
+
 # 1.0.17
 2024-04-08 (Date of Last Commit)
 
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
index 747a7030a1..eef60fd476 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
@@ -1,3 +1,6 @@
+# 3.1.22
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+
 # 3.1.21
 2024-04-08 (Date of Last Commit)
 
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index eaac72c59b..ce591371fc 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,3 +1,8 @@
+# 6.6.4
+2024-05-20 (Date of Last Commit)
+
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+
 # 6.6.3
 2024-05-08 (Date of Last Commit)
 
diff --git a/pipelines/skylab/snm3C/snm3C.changelog.md b/pipelines/skylab/snm3C/snm3C.changelog.md
index 0dbde2c845..5035568938 100644
--- a/pipelines/skylab/snm3C/snm3C.changelog.md
+++ b/pipelines/skylab/snm3C/snm3C.changelog.md
@@ -1,3 +1,7 @@
+# 4.0.2
+2024-05-20 (Date of Last Commit)
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to support ToA. This change does not affect the snM3C pipeline.
+
 # 4.0.1
 2024-04-18 (Date of Last Commit)
 * Updated the snM3C wdl to run on Azure. This change does not affect the snM3C pipeline.

From 3d78f0b669a33d8f583968d5416a2c97d27fdd7b Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:17:13 -0400
Subject: [PATCH 153/186] updated changelog for WGS Single sample pipeline

---
 .../wgs/WholeGenomeGermlineSingleSample.changelog.md        | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
index eef60fd476..e329070b0b 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
@@ -1,10 +1,8 @@
-# 3.1.22
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
-
 # 3.1.21
-2024-04-08 (Date of Last Commit)
+2024-05-21 (Date of Last Commit)
 
 * Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.  
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
 
 # 3.1.20
 2024-03-26 (Date of Last Commit)

From 4f61003631f1c67d04979c3d8edc291f09b40fe2 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:18:24 -0400
Subject: [PATCH 154/186] updated pipeline version to match changelong

---
 .../germline/single_sample/exome/ExomeGermlineSingleSample.wdl  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
index f5efc80b60..00769b467c 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
@@ -45,7 +45,7 @@ import "../../../../../../tasks/broad/Utilities.wdl" as utils
 # WORKFLOW DEFINITION
 workflow ExomeGermlineSingleSample {
 
-  String pipeline_version = "3.1.20"
+  String pipeline_version = "3.1.21"
 
 
   input {

From db223a848a86afbd6afe4bf19547ee3ee65a92ce Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:19:27 -0400
Subject: [PATCH 155/186] updated pipeline version to match changelong

---
 .../single_sample/exome/ExomeGermlineSingleSample.changelog.md  | 2 +-
 .../single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
index d30b3f70fe..38e0008f64 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
@@ -1,5 +1,5 @@
 # 3.1.21
-2024-05-20 (Date of Last Commit)
+2024-05-21 (Date of Last Commit)
 
 * Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
 
diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
index a404f5d561..b9270c3b3c 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
@@ -50,7 +50,7 @@ workflow UltimaGenomicsWholeGenomeGermline {
     filtering_model_no_gt_name: "String describing the optional filtering model; default set to rf_model_ignore_gt_incl_hpol_runs"
   }
 
-  String pipeline_version = "1.0.17"
+  String pipeline_version = "1.0.18"
 
 
   References references = alignment_references.references

From 45b40a5cde9f77544a10388ac552525f938576ac Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:20:44 -0400
Subject: [PATCH 156/186] updated pipeline version to match changelong

---
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 98b9fb77ee..34df120d96 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks
 workflow VariantCalling {
 
 
-  String pipeline_version = "2.1.20"
+  String pipeline_version = "2.1.19"
 
 
   input {

From 6d0603fe804531f49d32f198192d05b4cddc8cbc Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:22:37 -0400
Subject: [PATCH 157/186] updated changelog and pipeline version

---
 pipelines/broad/arrays/imputation/Imputation.changelog.md | 6 ++++++
 pipelines/broad/arrays/imputation/Imputation.wdl          | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pipelines/broad/arrays/imputation/Imputation.changelog.md b/pipelines/broad/arrays/imputation/Imputation.changelog.md
index e96dabb6a6..02b32dc771 100644
--- a/pipelines/broad/arrays/imputation/Imputation.changelog.md
+++ b/pipelines/broad/arrays/imputation/Imputation.changelog.md
@@ -1,3 +1,9 @@
+# 1.1.13
+2023-05-21 (Date of Last Commit)
+
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+
+
 # 1.1.12
 2023-12-18 (Date of Last Commit)
 
diff --git a/pipelines/broad/arrays/imputation/Imputation.wdl b/pipelines/broad/arrays/imputation/Imputation.wdl
index 44d5a93cd0..2780b64e62 100644
--- a/pipelines/broad/arrays/imputation/Imputation.wdl
+++ b/pipelines/broad/arrays/imputation/Imputation.wdl
@@ -6,7 +6,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Imputation {
 
-  String pipeline_version = "1.1.12"
+  String pipeline_version = "1.1.13"
 
   input {
     Int chunkLength = 25000000

From f08c5c8ace02e2d8e6f3fde9ba6e4aa92a5dca61 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:23:39 -0400
Subject: [PATCH 158/186] updated changelog and pipeline version

---
 .../arrays/imputation/BroadInternalImputation.changelog.md   | 5 +++++
 .../internal/arrays/imputation/BroadInternalImputation.wdl   | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
index 0ac74c9794..6d45d66333 100644
--- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
+++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
@@ -1,3 +1,8 @@
+# 1.1.11
+2024-05-21 (Date of Last Commit)
+
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+
 # 1.1.10
 2023-12-18 (Date of Last Commit)
 
diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl
index 3021fe6a4c..7b5e2958ce 100644
--- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl
+++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl
@@ -9,7 +9,7 @@ workflow BroadInternalImputation {
         description: "Push outputs of Imputation.wdl to TDR dataset table ImputationOutputsTable and split out Imputation arrays into ImputationWideOutputsTable."
         allowNestedInputs: true
     }
-    String pipeline_version = "1.1.10"
+    String pipeline_version = "1.1.11"
     
     input {
         # inputs to wrapper task 

From ef487b86d800a11fd223d63022edf60772d00dc1 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:24:59 -0400
Subject: [PATCH 159/186] updated changelog and pipeline version

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 076e8f1486..ab9eec2689 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -44,7 +44,7 @@ workflow snm3C {
     }
 
     # version of the pipeline
-    String pipeline_version = "4.0.1"
+    String pipeline_version = "4.0.2"
 
     call Demultiplexing {
         input:

From d09e0bdb3f34afe7247a71744976fb3d47c6c82b Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Tue, 21 May 2024 09:26:11 -0400
Subject: [PATCH 160/186] updated changelog and pipeline version

---
 pipelines/skylab/optimus/Optimus.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index b4b0196f89..91d843a839 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -68,7 +68,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "6.6.3"
+  String pipeline_version = "6.6.4"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays

From 10d934bd369bbd91edf5fc85aaf4a25d533af261 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 23 May 2024 14:19:40 -0400
Subject: [PATCH 161/186] reverting snm3c changes

---
 pipelines/skylab/snm3C/snm3C.changelog.md |  4 ----
 pipelines/skylab/snm3C/snm3C.wdl          | 13 ++-----------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.changelog.md b/pipelines/skylab/snm3C/snm3C.changelog.md
index 5035568938..0dbde2c845 100644
--- a/pipelines/skylab/snm3C/snm3C.changelog.md
+++ b/pipelines/skylab/snm3C/snm3C.changelog.md
@@ -1,7 +1,3 @@
-# 4.0.2
-2024-05-20 (Date of Last Commit)
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to support ToA. This change does not affect the snM3C pipeline.
-
 # 4.0.1
 2024-04-18 (Date of Last Commit)
 * Updated the snM3C wdl to run on Azure. This change does not affect the snM3C pipeline.
diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 1a1a6f1aa0..0f206de50c 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -44,7 +44,7 @@ workflow snm3C {
     }
 
     # version of the pipeline
-    String pipeline_version = "4.0.2"
+    String pipeline_version = "4.0.1"
 
     call Demultiplexing {
         input:
@@ -72,10 +72,8 @@ workflow snm3C {
                 r2_right_cut = r2_right_cut,
                 plate_id = plate_id,
                 docker = docker_prefix + m3c_yap_hisat_docker,
-
                 cromwell_root_dir = cromwell_root_dir,
                 cloud_provider = cloud_provider,
-
         }
 
         call Hisat_single_end as Hisat_single_end {
@@ -85,10 +83,8 @@ workflow snm3C {
                 genome_fa = genome_fa,
                 plate_id = plate_id,
                 docker = docker_prefix + m3c_yap_hisat_docker,
-
                 cromwell_root_dir = cromwell_root_dir,
                 cloud_provider = cloud_provider
-
         }
 
         call Merge_sort_analyze as Merge_sort_analyze {
@@ -259,7 +255,6 @@ task Hisat_paired_end {
         File chromosome_sizes
         String plate_id
         String docker
-
         String cromwell_root_dir
         String cloud_provider
 
@@ -276,10 +271,7 @@ task Hisat_paired_end {
         Int preemptible_tries = 2
         String cpu_platform =  "Intel Ice Lake"
     }
-    
-    cromwell_root_dir=$(pwd)
-    batch_dir=$cromwell_root_dir/batch*
-    
+
     command <<<
         set -euo pipefail
         WORKING_DIR=`pwd`
@@ -323,7 +315,6 @@ task Hisat_paired_end {
         echo "lsing cromwell root dir"
         ls -lR ~{cromwell_root_dir}
 
-
         # define lists of r1 and r2 fq files
         if [ ~{cloud_provider} = "gcp" ]; then
             batch_dir="~{cromwell_root_dir}~{cromwell_root_dir}/batch*/"

From 3484065049cf518ca67bf916614330255b90e826 Mon Sep 17 00:00:00 2001
From: John Scira <jscira@broadinstitute.org>
Date: Thu, 23 May 2024 15:43:21 -0400
Subject: [PATCH 162/186] added notes for SlideSeq and changelog for updated
 tasks

---
 pipelines/skylab/slideseq/SlideSeq.changelog.md | 5 +++++
 pipelines/skylab/slideseq/SlideSeq.wdl          | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
index b637f92696..74ad623070 100644
--- a/pipelines/skylab/slideseq/SlideSeq.changelog.md
+++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.7
+2023-05-23 (Date of Last Commit)
+
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+
 # 3.1.6
 2024-05-07 (Date of Last Commit)
 
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 0502a32fcd..0998c8eb9b 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.6"
+    String pipeline_version = "3.1.7"
 
     input {
         Array[File] r1_fastq

From 4e8b5e51e760b544d67fea376249428870b02c39 Mon Sep 17 00:00:00 2001
From: Nareh Sahakian <sahakian@broadinstitute.org>
Date: Wed, 29 May 2024 09:27:39 -0400
Subject: [PATCH 163/186] remove logging and debugging statements

---
 pipelines/skylab/snm3C/snm3C.wdl | 67 +-------------------------------
 1 file changed, 2 insertions(+), 65 deletions(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 0f206de50c..2e07e931c9 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -312,10 +312,6 @@ task Hisat_paired_end {
         elapsed=$((end - start))
         echo "Elapsed time to untar: $elapsed seconds"
 
-        echo "lsing cromwell root dir"
-        ls -lR ~{cromwell_root_dir}
-
-        # define lists of r1 and r2 fq files
         if [ ~{cloud_provider} = "gcp" ]; then
             batch_dir="~{cromwell_root_dir}~{cromwell_root_dir}/batch*/"
         else
@@ -426,7 +422,6 @@ task Hisat_paired_end {
       for file in "${R1_files[@]}"; do
         (
           echo "starting task $file.."
-          du -h  $batch_dir/$file
           task "$file"
           sleep $(( (RANDOM % 3) + 1))
         ) &
@@ -568,10 +563,6 @@ task Hisat_single_end {
         R1_files=($(ls | grep "\.hisat3n_dna.split_reads.R1.fastq"))
         R2_files=($(ls | grep "\.hisat3n_dna.split_reads.R2.fastq"))
 
-        echo "Found R1 files: $R1_files"
-        echo "Found R2 files: $R2_files"
-
-
         task() {
           BASE=$(basename "$file" ".hisat3n_dna.split_reads.R1.fastq")
           echo $BASE
@@ -650,18 +641,13 @@ task Hisat_single_end {
          elapsed=$((end - start))
          echo "Elapsed time to run samtools -q 10 $elapsed seconds"
 
-         # remove_overlap_read_parts
-         echo "recusively ls cromwell root"
-         ls -lR ~{cromwell_root_dir}
-
          if [ ~{cloud_provider} = "gcp" ]; then
             bam_path_prefix="~{cromwell_root_dir}"
          else
             bam_path_prefix=$WORKING_DIR
          fi
 
-         echo "bam_path_prefix $bam_path_prefix"
-
+         # remove_overlap_read_parts
          echo "call remove_overlap_read_parts"
          start=$(date +%s)
          python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path="'"$BASE"'.name_sorted.filtered.bam",out_bam_path="'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam")'
@@ -869,17 +855,12 @@ task Merge_sort_analyze {
         elapsed=$((end - start))
         echo "Elapsed time to chromatin contacts $elapsed seconds"
 
-        echo "recursively ls cromwell root"
-        ls -lR ~{cromwell_root_dir}
-
         if [ ~{cloud_provider} = "gcp" ]; then
             reference_fasta="~{cromwell_root_dir}/reference/~{genome_base}"
           else
             reference_fasta="$WORKING_DIR/reference/~{genome_base}"
         fi
 
-        echo "reference fast location: $reference_fasta"
-
         start=$(date +%s)
         echo "Call allcools bam-to-allc from deduped.bams"
         /opt/conda/bin/allcools bam-to-allc \
@@ -1018,11 +999,6 @@ task Summary {
     command <<<
         set -euo pipefail
 
-        echo "recursively lsing cromwell root in summary task"
-        ls -lR ~{cromwell_root_dir}
-        echo "lsing current dir"
-        ls -lrt
-
         WORKING_DIR=`pwd`
 
         if [ ~{cloud_provider} = "gcp" ]; then
@@ -1034,10 +1010,6 @@ task Summary {
             matrix_files_dir="$WORKING_DIR~{cromwell_root_dir}/output_bams"
             allc_index_dir="$WORKING_DIR~{cromwell_root_dir}/allc-*"
         fi
-        echo "matrix files dir: $matrix_files_dir"
-        echo "allc_index_dir: $allc_index_dir"
-        echo "base directory is: $base_directory"
-
 
         mkdir $base_directory/fastq
         mkdir $base_directory/bam
@@ -1051,9 +1023,7 @@ task Summary {
                     return
             fi
             for tar in "${@}"; do
-                echo "unstarring this file now: $tar"
                 tar -xvf "$tar"
-                echo "removing this tar file now: $tar"
                 rm "$tar"
             done
         }
@@ -1067,12 +1037,6 @@ task Summary {
         extract_and_remove ~{sep=' ' allc_uniq_reads_stats}
         extract_and_remove ~{sep=' ' unique_reads_cgn_extraction_tbi}
 
-        echo "lsing cromwell root again"
-        ls -lRt ~{cromwell_root_dir}
-
-        echo "lsing current directory again"
-        ls -lRt
-
         mv *.trimmed.stats.txt $base_directory/fastq
         mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt $base_directory/bam
         mv $matrix_files_dir/*.hisat3n_dna.all_reads.deduped.matrix.txt $base_directory/bam
@@ -1080,34 +1044,7 @@ task Summary {
         mv *.allc.tsv.gz.count.csv $base_directory/allc
         mv $allc_index_dir/*.allc.tsv.gz.tbi $base_directory/allc
 
-        cwd=`pwd`
-        echo "current working dir is: $cwd"
-
-
-        python3 <<CODE
-        from cemba_data.hisat3n import *
-        import os
-        working_dir = os.getcwd()
-        print(f"Current working direcetory is: {working_dir}")
-
-        print("Calling summary function")
-        snm3c_summary()
-
-        print("Called summary function")
-
-        working_dir = os.getcwd()
-        print(f"Current working direcetory is: {working_dir}")
-        print("These are the files located here:")
-        files = os.listdir()
-        print(files)
-
-        CODE
-
-        cwd=`pwd`
-        echo "current working dir is: $cwd"
-        echo "recursively lsing cromwell root"
-        ls -lRt ~{cromwell_root_dir}
-
+        python3 -c 'from cemba_data.hisat3n import *;snm3c_summary()'
         mv MappingSummary.csv.gz ~{plate_id}_MappingSummary.csv.gz
 
     >>>

From c0672fdb5fcaa5db15ca682c376ee6043a347cb2 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 7 Jun 2024 09:48:55 -0400
Subject: [PATCH 164/186] just testing

---
 pipelines/skylab/paired_tag/PairedTag.wdl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index 22667f27d9..64ab0c9d1b 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -51,7 +51,7 @@ workflow PairedTag {
     }
 
     # All docker images that are needed for tasks in this workflow
-    String upstools_docker = "upstools:1.2.0-2023.03.03-1704723060"
+    String upstools_docker = "upstools:2.0.0"
     String snapatac_docker = "snapatac2:1.0.4-2.3.1-1700590229"
 
     # Prefixes based on cloud env
@@ -100,7 +100,8 @@ workflow PairedTag {
               barcodes_fastq = atac_r2_fastq[idx],
               input_id = input_id,
               whitelist = atac_whitelist,
-              preindex = preindex
+              preindex = preindex,
+              docker_path = docker_prefix + upstools_docker
         }
     }
 

From 774b129d80d1ab224af4ad4b9dd53cfb74719ab2 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 7 Jun 2024 11:42:19 -0400
Subject: [PATCH 165/186] add input to test

---
 verification/test-wdls/TestPairedTag.wdl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/verification/test-wdls/TestPairedTag.wdl b/verification/test-wdls/TestPairedTag.wdl
index 9695fb98e6..5294e258a5 100644
--- a/verification/test-wdls/TestPairedTag.wdl
+++ b/verification/test-wdls/TestPairedTag.wdl
@@ -53,6 +53,7 @@ workflow TestPairedTag {
       String vault_token_path
       String google_account_vault_path
       Boolean run_cellbender = false
+      String cloud_provider
 
     }
 
@@ -86,7 +87,8 @@ workflow TestPairedTag {
         adapter_seq_read3 = adapter_seq_read3,
         chrom_sizes = chrom_sizes,
         atac_whitelist = atac_whitelist,
-        soloMultiMappers = soloMultiMappers
+        soloMultiMappers = soloMultiMappers,
+        cloud_provider = cloud_provider
     }
 
     

From 498b7438359736174136d5765cae617b7743b96b Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 7 Jun 2024 14:31:37 -0400
Subject: [PATCH 166/186] changelogs

---
 pipelines/skylab/multiome/Multiome.changelog.md    | 5 +++++
 pipelines/skylab/multiome/Multiome.wdl             | 2 +-
 pipelines/skylab/multiome/atac.changelog.md        | 5 +++++
 pipelines/skylab/multiome/atac.wdl                 | 4 ++--
 pipelines/skylab/optimus/Optimus.wdl               | 2 +-
 pipelines/skylab/paired_tag/PairedTag.changelog.md | 5 +++++
 pipelines/skylab/paired_tag/PairedTag.wdl          | 2 +-
 7 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index a2737a1c95..ea1e04d94d 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,3 +1,8 @@
+# 5.0.1
+2024-05-20 (Date of Last Commit)
+
+* Updated the Multiome.wdl to run on Azure
+
 # 5.0.0
 2024-05-20 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index b7f21a8b39..3c3b7d222b 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "5.0.0"
+    String pipeline_version = "5.0.1"
 
     input {
         String cloud_provider
diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
index 7478a49e0d..596b401a07 100644
--- a/pipelines/skylab/multiome/atac.changelog.md
+++ b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,3 +1,8 @@
+# 5.0.1
+2024-06-07 (Date of Last Commit)
+
+* Updated the atac.wdl to run on Azure
+
 # 2.0.0
 2024-05-20 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 644799da11..2ff8512111 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -43,11 +43,11 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "2.0.0"
+  String pipeline_version = "2.0.1"
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
-  String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
+  String acr_docker_prefix = "dsppipeli nedev.azurecr.io/"
   String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
 
   # Docker image names
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 6f56e87060..b2a05cf727 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -68,7 +68,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "7.1.0"
+  String pipeline_version = "7.1.2"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index 22491b540b..aa3efd879a 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,3 +1,8 @@
+# 0.7.1
+2024-06-07 (Date of Last Commit)
+
+* Updated the PairedTag.wdl to run on Azure
+
 # 0.7.0
 2024-05-20
 
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index 64ab0c9d1b..bc19f65160 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -7,7 +7,7 @@ import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing
 import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow PairedTag {
-    String pipeline_version = "0.7.0"
+    String pipeline_version = "0.7.1"
 
     input {
         String input_id

From 3d942b9f6181642360fa2857e4f272a5b72d0586 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Fri, 7 Jun 2024 14:32:55 -0400
Subject: [PATCH 167/186] changelogs

---
 pipelines/skylab/multiome/atac.changelog.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
index 596b401a07..86c284ef92 100644
--- a/pipelines/skylab/multiome/atac.changelog.md
+++ b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,4 +1,4 @@
-# 5.0.1
+# 2.0.1
 2024-06-07 (Date of Last Commit)
 
 * Updated the atac.wdl to run on Azure

From a27adc23935088f73b37742fab7d381949c0951c Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 11 Jun 2024 10:28:43 -0400
Subject: [PATCH 168/186] changelogs

---
 pipelines/skylab/multiome/atac.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 2ff8512111..3c2f420c20 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -47,7 +47,7 @@ workflow ATAC {
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
-  String acr_docker_prefix = "dsppipeli nedev.azurecr.io/"
+  String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
   String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
 
   # Docker image names

From 08d6bbce9f4d46d679851498b4ed1b249d4d0c00 Mon Sep 17 00:00:00 2001
From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
Date: Tue, 18 Jun 2024 09:22:12 -0400
Subject: [PATCH 169/186] Np make vm size an input to multiome (#1289)

* add vm size as input to Multiome.wdl

* add vm size as input to Multiome.wdl

* add new input to overviews

* add new input to overviews

* add new input to overviews

* add new input to overviews
---
 pipelines/skylab/multiome/Multiome.wdl        |  7 ++-
 pipelines/skylab/multiome/atac.wdl            | 17 ++++--
 pipelines/skylab/paired_tag/PairedTag.wdl     |  6 ++-
 .../Plumbing/BC011_BC015_downsampled.json     |  3 +-
 .../Plumbing/BI015_downsampled.json           |  3 +-
 website/docs/Pipelines/ATAC/README.md         | 33 ++++++------
 .../Pipelines/Multiome_Pipeline/README.md     | 52 ++++++++++---------
 7 files changed, 70 insertions(+), 51 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 3c3b7d222b..1a150ea9ea 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -35,6 +35,8 @@ workflow Multiome {
         Array[File] atac_r1_fastq
         Array[File] atac_r2_fastq
         Array[File] atac_r3_fastq
+        # VM size used for several ATAC tasks
+        String vm_size = "Standard_M128s"
         # BWA tar reference
         File tar_bwa_reference
         # Chromosone sizes 
@@ -109,8 +111,9 @@ workflow Multiome {
             chrom_sizes = chrom_sizes,
             whitelist = atac_whitelist,
             adapter_seq_read1 = adapter_seq_read1,
-            annotations_gtf = annotations_gtf,
-            adapter_seq_read3 = adapter_seq_read3
+            adapter_seq_read3 = adapter_seq_read3,
+            vm_size = vm_size,
+            annotations_gtf = annotations_gtf
     }
     call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes {
         input:
diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 3c2f420c20..061e9c892b 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -30,6 +30,7 @@ workflow ATAC {
     Int num_threads_bwa = 128
     Int mem_size_bwa = 512
     String cpu_platform_bwa = "Intel Ice Lake"
+    String vm_size
 
     # Text file containing chrom_sizes for genome build (i.e. hg38)
     File chrom_sizes
@@ -80,7 +81,8 @@ workflow ATAC {
     input:
        nthreads = num_threads_bwa, 
        mem_size = mem_size_bwa,
-       cpu_platform = cpu_platform_bwa
+       cpu_platform = cpu_platform_bwa,
+       vm_size = vm_size
   }
 
   call FastqProcessing.FastqProcessATAC as SplitFastq {
@@ -116,7 +118,8 @@ workflow ATAC {
         mem_size = mem_size_bwa,
         cpu_platform = cpu_platform_bwa,
         docker_path = docker_prefix + samtools_docker,
-        cloud_provider = cloud_provider
+        cloud_provider = cloud_provider,
+        vm_size = vm_size
   }
 
   if (preindex) {
@@ -166,12 +169,14 @@ task GetNumSplits {
     Int mem_size
     String cpu_platform 
     String docker_image = "ubuntu:latest"
+    String vm_size
   }
 
   parameter_meta {
     docker_image: "the ubuntu docker image (default: ubuntu:latest)"
     nthreads: "Number of threads per node (default: 128)"
     mem_size: "the size of memory used during alignment"
+    vm_size: "the virtual machine used for the task"
   }
 
   command <<<
@@ -236,7 +241,7 @@ task GetNumSplits {
     cpu: nthreads
     cpuPlatform: cpu_platform
     memory: "${mem_size} GiB"
-    vm_size: "Standard_M128s"
+    vm_size: vm_size
   }
 
   output {
@@ -327,7 +332,8 @@ task BWAPairedEndAlignment {
     Int disk_size = 2000
     Int nthreads
     Int mem_size
-    String cpu_platform 
+    String cpu_platform
+    String vm_size
   }
 
   parameter_meta {
@@ -342,6 +348,7 @@ task BWAPairedEndAlignment {
     output_base_name: "basename to be used for the output of the task"
     docker_path: "The docker image path containing the runtime environment for this task"
     cloud_provider: "The cloud provider for the pipeline."
+    vm_size: "the virtual machine used for the task"
   }
 
   String bam_aligned_output_name = output_base_name + ".bam"
@@ -471,7 +478,7 @@ task BWAPairedEndAlignment {
     cpu: nthreads
     cpuPlatform: cpu_platform
     memory: "${mem_size} GiB"
-    vm_size: "Standard_M128s"
+    vm_size: vm_size
   }
 
   output {
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index bc19f65160..ce1eb08599 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -34,6 +34,9 @@ workflow PairedTag {
         Array[File] atac_r1_fastq
         Array[File] atac_r2_fastq
         Array[File] atac_r3_fastq
+
+        String vm_size = "Standard_M128s"
+
         # BWA input
         File tar_bwa_reference
         File chrom_sizes
@@ -119,7 +122,8 @@ workflow PairedTag {
             adapter_seq_read3 = adapter_seq_read3,
             annotations_gtf = annotations_gtf,
             preindex = preindex,
-            cloud_provider = cloud_provider
+            cloud_provider = cloud_provider,
+            vm_size = vm_size
     }
 
     if (preindex) {
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json
index 9e7b18b679..470b1ce33c 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json
@@ -23,5 +23,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.cloud_provider": "gcp"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json
index 2bdd7a8fe2..67560d3aee 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json
@@ -23,5 +23,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.cloud_provider": "gcp"
 }
diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md
index 76033520f8..d95d82a440 100644
--- a/website/docs/Pipelines/ATAC/README.md
+++ b/website/docs/Pipelines/ATAC/README.md
@@ -44,23 +44,24 @@ ATAC can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/stable/
 ## Input Variables
 The following describes the inputs of the ATAC workflow. For more details on how default inputs are set for the Multiome workflow, see the [Multiome overview](../Multiome_Pipeline/README).
 
-| Variable name | Description |
-| --- | --- |
-| read1_fastq_gzipped | Fastq inputs (array of compressed read 1 FASTQ files). |
-| read2_fastq_gzipped | Fastq inputs (array of compressed read 2 FASTQ files containing cellular barcodes). |
-| read3_fastq_gzipped | Fastq inputs (array of compressed read 3 FASTQ files). |
-| input_id | Output prefix/base name for all intermediate files and pipeline outputs. |
+| Variable name | Description                                                                                                     |
+| --- |-----------------------------------------------------------------------------------------------------------------|
+| read1_fastq_gzipped | Fastq inputs (array of compressed read 1 FASTQ files).                                                          |
+| read2_fastq_gzipped | Fastq inputs (array of compressed read 2 FASTQ files containing cellular barcodes).                             |
+| read3_fastq_gzipped | Fastq inputs (array of compressed read 3 FASTQ files).                                                          |
+| input_id | Output prefix/base name for all intermediate files and pipeline outputs.                                        |
 | cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String |
-| preindex | Boolean used for paired-tag data and not applicable to ATAC data types; default is set to false. | 
-| tar_bwa_reference | BWA reference (tar file containing reference fasta and corresponding files). |
-| num_threads_bwa | Optional integer defining the number of CPUs per node for the BWA-mem alignment task (default: 128). |
-| mem_size_bwa | Optional integer defining the memory size for the BWA-mem alignment task in GB (default: 512). |
-| cpu_platform_bwa | Optional string defining the CPU platform for the BWA-mem alignment task (default: "Intel Ice Lake"). |
-| annotations_gtf | CreateFragmentFile input variable: GTF file for SnapATAC2 to calculate TSS sites of fragment file.|
-| chrom_sizes | CreateFragmentFile input variable: Text file containing chrom_sizes for genome build (i.e., hg38) |
-| whitelist | Whitelist file for ATAC cellular barcodes. |
-| adapter_seq_read1 | TrimAdapters input: Sequence adapter for read 1 fastq. |
-| adapter_seq_read3 | TrimAdapters input: Sequence adapter for read 3 fastq. |
+| preindex | Boolean used for paired-tag data and not applicable to ATAC data types; default is set to false.                | 
+| tar_bwa_reference | BWA reference (tar file containing reference fasta and corresponding files).                                    |
+| num_threads_bwa | Optional integer defining the number of CPUs per node for the BWA-mem alignment task (default: 128).            |
+| mem_size_bwa | Optional integer defining the memory size for the BWA-mem alignment task in GB (default: 512).                  |
+| cpu_platform_bwa | Optional string defining the CPU platform for the BWA-mem alignment task (default: "Intel Ice Lake").           |
+| annotations_gtf | CreateFragmentFile input variable: GTF file for SnapATAC2 to calculate TSS sites of fragment file.              |
+| chrom_sizes | CreateFragmentFile input variable: Text file containing chrom_sizes for genome build (i.e., hg38)               |
+| whitelist | Whitelist file for ATAC cellular barcodes.                                                                      |
+| adapter_seq_read1 | TrimAdapters input: Sequence adapter for read 1 fastq.                                                          |
+| adapter_seq_read3 | TrimAdapters input: Sequence adapter for read 3 fastq.                                                          |
+| vm_size | String defining the Azure virtual machine family for the workflow (default: "Standard_M128s").                  | String |
 
 ## ATAC tasks and tools
 
diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md
index ecdbea40b1..4d77ad4dfe 100644
--- a/website/docs/Pipelines/Multiome_Pipeline/README.md
+++ b/website/docs/Pipelines/Multiome_Pipeline/README.md
@@ -52,32 +52,34 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta
 
 ## Inputs
 
-| Input name | Description | Type |
-| --- | --- | --- |
-| input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID. | String |
-| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String |
-| annotations_gtf | GTF file containing gene annotations used for GEX cell metric calculation and ATAC fragment metrics; must match the GTF used to build the STAR aligner. | File |
-| gex_r1_fastq | Array of read 1 FASTQ files representing a single GEX 10x library. | Array[File] |
-| gex_r2_fastq | Array of read 2 FASTQ files representing a single GEX 10x library.| Array[File] |
-| gex_i1_fastq | Optional array of index FASTQ files representing a single GEX 10x library; multiplexed samples are not currently supported, but the file may be passed to the pipeline. | Array[File] |
-| tar_star_reference | TAR file containing a species-specific reference genome and GTF for Optimus (GEX) pipeline. | File | 
-| mt_genes | Optional file for the Optimus (GEX) pipeline containing mitochondrial gene names used for metric calculation; default assumes 'mt' prefix in GTF (case insensitive). | File |
-| counting_mode | Optional string that determines whether the Optimus (GEX) pipeline should be run in single-cell mode (sc_rna) or single-nucleus mode (sn_rna); default is "sn_rna". | String |
-| tenx_chemistry_version | Optional integer for the Optimus (GEX) pipeline specifying the 10x version chemistry the data was generated with; validated by examination of the first read 1 FASTQ file read structure; default is "3". | Integer |
-| emptydrops_lower | Optional threshold for UMIs for the Optimus (GEX) pipeline that empty drops tool should consider for determining cell; data below threshold is not removed; default is "100". | Integer |
-| force_no_check | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should perform checks; default is "false". | Boolean |
+| Input name | Description                                                                                                                                                                                                                                                | Type |
+| --- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| --- |
+| input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID.                                                                                                               | String |
+| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure".                                                                                                                                            | String |
+| annotations_gtf | GTF file containing gene annotations used for GEX cell metric calculation and ATAC fragment metrics; must match the GTF used to build the STAR aligner.                                                                                                    | File |
+| gex_r1_fastq | Array of read 1 FASTQ files representing a single GEX 10x library.                                                                                                                                                                                         | Array[File] |
+| gex_r2_fastq | Array of read 2 FASTQ files representing a single GEX 10x library.                                                                                                                                                                                         | Array[File] |
+| gex_i1_fastq | Optional array of index FASTQ files representing a single GEX 10x library; multiplexed samples are not currently supported, but the file may be passed to the pipeline.                                                                                    | Array[File] |
+| tar_star_reference | TAR file containing a species-specific reference genome and GTF for Optimus (GEX) pipeline.                                                                                                                                                                | File | 
+| mt_genes | Optional file for the Optimus (GEX) pipeline containing mitochondrial gene names used for metric calculation; default assumes 'mt' prefix in GTF (case insensitive).                                                                                       | File |
+| counting_mode | Optional string that determines whether the Optimus (GEX) pipeline should be run in single-cell mode (sc_rna) or single-nucleus mode (sn_rna); default is "sn_rna".                                                                                        | String |
+| tenx_chemistry_version | Optional integer for the Optimus (GEX) pipeline specifying the 10x version chemistry the data was generated with; validated by examination of the first read 1 FASTQ file read structure; default is "3".                                                  | Integer |
+| emptydrops_lower | Optional threshold for UMIs for the Optimus (GEX) pipeline that empty drops tool should consider for determining cell; data below threshold is not removed; default is "100".                                                                              | Integer |
+| force_no_check | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should perform checks; default is "false".                                                                                                                                      | Boolean |
 | ignore_r1_read_length | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should ignore barcode chemistry check; if "true", the workflow will not ensure the `10x_chemistry_version` input matches the chemistry in the read 1 FASTQ; default is "false". | Boolean |
-| star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String |
-| count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean |
-| soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String |
-| atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] |
-| atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] |
-| atac_r3_fastq | Array of read 2 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] |
-| tar_bwa_reference | TAR file containing the reference index files for BWA-mem alignment for the ATAC pipeline. | File | 
-| chrom_sizes | File containing the genome chromosome sizes; used to calculate ATAC fragment file metrics. | File |
-| adapter_seq_read1 | Optional string describing the adapter sequence for ATAC read 1 paired-end reads to be used during adapter trimming with Cutadapt; default is "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG". | String |
-| adapter_seq_read3 | Optional string describing the adapter sequence for ATAC read 2 paired-end reads to be used during adapter trimming with Cutadapt; default is "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG". | String |
-| run_cellbender | Optional boolean used to determine if the Optimus (GEX) pipeline should run CellBender on the output gene expression h5ad file, `h5ad_output_file_gex`; default is "false". | Boolean |
+| star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward".                                                                                          | String |
+| count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false".                         | Boolean |
+| soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag.                                                                                                                               | String |
+| atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library.                                                                                                                                                                    | Array[File] |
+| atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library.                                                                                                                                                                             | Array[File] |
+| atac_r3_fastq | Array of read 2 paired-end FASTQ files representing a single 10x multiome ATAC library.                                                                                                                                                                    | Array[File] |
+| tar_bwa_reference | TAR file containing the reference index files for BWA-mem alignment for the ATAC pipeline.                                                                                                                                                                 | File | 
+| chrom_sizes | File containing the genome chromosome sizes; used to calculate ATAC fragment file metrics.                                                                                                                                                                 | File |
+| adapter_seq_read1 | Optional string describing the adapter sequence for ATAC read 1 paired-end reads to be used during adapter trimming with Cutadapt; default is "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG".                                                                        | String |
+| adapter_seq_read3 | Optional string describing the adapter sequence for ATAC read 2 paired-end reads to be used during adapter trimming with Cutadapt; default is "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG".                                                                         | String |
+| run_cellbender | Optional boolean used to determine if the Optimus (GEX) pipeline should run CellBender on the output gene expression h5ad file, `h5ad_output_file_gex`; default is "false".                                                                                | Boolean |
+| vm_size | String defining the Azure virtual machine family for the workflow (default: "Standard_M128s").                                                                                                                                                                   | String |
+
 
 #### Sample inputs for analyses in a Terra Workspace
 

From 988c970eecd965ce283e01b0ef461df04b71c639 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 25 Jun 2024 13:03:42 -0400
Subject: [PATCH 170/186] more disk and mem

---
 tasks/broad/GermlineVariantDiscovery.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl
index d6b6e55cc3..bdfa826dfc 100644
--- a/tasks/broad/GermlineVariantDiscovery.wdl
+++ b/tasks/broad/GermlineVariantDiscovery.wdl
@@ -103,13 +103,13 @@ task HaplotypeCaller_GATK4_VCF {
     Int memory_multiplier = 1
   }
   
-  Int memory_size_mb = ceil(8000 * memory_multiplier)
+  Int memory_size_mb = ceil(8000 * memory_multiplier) + 2000
 
   String output_suffix = if make_gvcf then ".g.vcf.gz" else ".vcf.gz"
   String output_file_name = vcf_basename + output_suffix
 
   Float ref_size = size(ref_fasta, "GiB") + size(ref_fasta_index, "GiB") + size(ref_dict, "GiB")
-  Int disk_size = ceil(((size(input_bam, "GiB") + 30) / hc_scatter) + ref_size) + 20
+  Int disk_size = ceil(((size(input_bam, "GiB") + 30) / hc_scatter) + ref_size) + 50
 
   String bamout_arg = if make_bamout then "-bamout ~{vcf_basename}.bamout.bam" else ""
 

From 1da298a607aecbbe90e352a1e15a7b33adb7286c Mon Sep 17 00:00:00 2001
From: aawdeh <awdeh@broadinstitute.org>
Date: Wed, 3 Jul 2024 14:21:47 -0400
Subject: [PATCH 171/186] Azurize Cell Bender in Multiome (#1299)

---
 pipelines/skylab/multiome/Multiome.wdl | 45 ++++++++++++++++++--------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 1a150ea9ea..6257e744d5 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -3,6 +3,7 @@ version 1.0
 import "../../../pipelines/skylab/multiome/atac.wdl" as atac
 import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
 import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
+import "https://raw.githubusercontent.com/aawdeh/CellBender/aa-cbwithoutcuda/wdl/cellbender_remove_background_azure.wdl" as CellBender_no_cuda
 import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender
 import "../../../tasks/broad/Utilities.wdl" as utils
 
@@ -127,25 +128,43 @@ workflow Multiome {
 
     # Call CellBender
     if (run_cellbender) {
-        call CellBender.run_cellbender_remove_background_gpu as CellBender {
-            input:
-                sample_name = input_id,
-                input_file_unfiltered = Optimus.h5ad_output_file,
-                hardware_boot_disk_size_GB = 20,
-                hardware_cpu_count = 4,
-                hardware_disk_size_GB = 50,
-                hardware_gpu_type = "nvidia-tesla-t4",
-                hardware_memory_GB = 32,
-                hardware_preemptible_tries = 2,
-                hardware_zones = "us-central1-a us-central1-c",
-                nvidia_driver_version = "470.82.01"
-        }
+        if (cloud_provider == "gcp") {
+            call CellBender.run_cellbender_remove_background_gpu as CellBender {
+                input:
+                    sample_name = input_id,
+                    input_file_unfiltered = Optimus.h5ad_output_file,
+                    hardware_boot_disk_size_GB = 20,
+                    hardware_cpu_count = 4,
+                    hardware_disk_size_GB = 50,
+                    hardware_gpu_type = "nvidia-tesla-t4",
+                    hardware_memory_GB = 32,
+                    hardware_preemptible_tries = 2,
+                    hardware_zones = "us-central1-a us-central1-c",
+                    nvidia_driver_version = "470.82.01"
+            }
+        } 
+        if (cloud_provider == "azure") {
+            call CellBender_no_cuda.run_cellbender_remove_background_gpu as CellBender_no_cuda {
+                input:
+                    sample_name = input_id,
+                    input_file_unfiltered = Optimus.h5ad_output_file,
+                    hardware_boot_disk_size_GB = 20,
+                    hardware_cpu_count = 4,
+                    hardware_disk_size_GB = 50,
+                    hardware_gpu_type = "nvidia-tesla-t4",
+                    hardware_memory_GB = 32,
+                    hardware_preemptible_tries = 2,
+                    hardware_zones = "us-central1-a us-central1-c",
+                    nvidia_driver_version = "470.82.01"
+            }
+        }           
     }
 
     meta {
         allowNestedInputs: true
     }
 
+    
     output {
         
         String multiome_pipeline_version_out = pipeline_version

From b3293c57dbe7cec9379910f8f9926d223a9d0a18 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 9 Jul 2024 12:04:27 -0400
Subject: [PATCH 172/186] update docker for Summary_PerCellOutput

---
 pipelines/skylab/snm3C/snm3C.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index b1850b8af1..5975a2a46e 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -112,7 +112,7 @@ workflow snm3C {
             unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar,
             unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar,
             plate_id = plate_id,
-            docker = docker
+            docker = docker_prefix + m3c_yap_hisat_docker
     }
 
     call Summary {

From 82cccda91e19a526682f849a3f575c31f32b74d1 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 9 Jul 2024 13:17:22 -0400
Subject: [PATCH 173/186] changelogs

---
 pipelines/broad/arrays/single_sample/Arrays.wdl                 | 2 +-
 .../single_sample/wgs/WholeGenomeGermlineSingleSample.wdl       | 2 +-
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl   | 2 +-
 .../single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl   | 2 +-
 pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl | 2 +-
 .../broad/internal/arrays/single_sample/BroadInternalArrays.wdl | 2 +-
 .../UltimaGenomics/BroadInternalUltimaGenomics.wdl              | 2 +-
 pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl   | 2 +-
 pipelines/broad/qc/CheckFingerprint.wdl                         | 2 +-
 pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl        | 2 +-
 .../reprocessing/external/exome/ExternalExomeReprocessing.wdl   | 2 +-
 .../external/wgs/ExternalWholeGenomeReprocessing.wdl            | 2 +-
 pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl    | 2 +-
 pipelines/skylab/snm3C/snm3C.wdl                                | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/pipelines/broad/arrays/single_sample/Arrays.wdl b/pipelines/broad/arrays/single_sample/Arrays.wdl
index 75e52e5c90..2455e4ab20 100644
--- a/pipelines/broad/arrays/single_sample/Arrays.wdl
+++ b/pipelines/broad/arrays/single_sample/Arrays.wdl
@@ -23,7 +23,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Arrays {
 
-  String pipeline_version = "2.6.24"
+  String pipeline_version = "2.6.25"
 
   input {
     String chip_well_barcode
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
index 48af86c619..72183e30db 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
@@ -40,7 +40,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl"
 workflow WholeGenomeGermlineSingleSample {
 
 
-  String pipeline_version = "3.1.21"
+  String pipeline_version = "3.1.22"
 
 
   input {
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 34df120d96..98b9fb77ee 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks
 workflow VariantCalling {
 
 
-  String pipeline_version = "2.1.19"
+  String pipeline_version = "2.1.20"
 
 
   input {
diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
index 17d4fecfb8..6cc165522f 100644
--- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
+++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
@@ -43,7 +43,7 @@ workflow UltimaGenomicsWholeGenomeCramOnly {
     save_bam_file: "If true, then save intermeidate ouputs used by germline pipeline (such as the output BAM) otherwise they won't be kept as outputs."
   }
 
-  String pipeline_version = "1.0.17"
+  String pipeline_version = "1.0.18"
 
   References references = alignment_references.references
 
diff --git a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl
index 2443bc8bcb..314995c5db 100644
--- a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl
+++ b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl
@@ -21,7 +21,7 @@ import "../../../../tasks/broad/Qc.wdl" as Qc
 
 workflow IlluminaGenotypingArray {
 
-  String pipeline_version = "1.12.18"
+  String pipeline_version = "1.12.19"
 
   input {
     String sample_alias
diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
index b7bf1c183e..6a9b1b195d 100644
--- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
+++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
@@ -9,7 +9,7 @@ workflow BroadInternalArrays {
         description: "Push outputs of Arrays.wdl to TDR dataset table ArraysOutputsTable."
     }
 
-    String pipeline_version = "1.1.8"
+    String pipeline_version = "1.1.9"
 
     input {
         # inputs to wrapper task
diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
index 946c9196dd..df1b6e664b 100644
--- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
+++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
@@ -6,7 +6,7 @@ import "../../../../../../../pipelines/broad/qc/CheckFingerprint.wdl" as FP
 
 workflow BroadInternalUltimaGenomics {
 
-  String pipeline_version = "1.0.18"
+  String pipeline_version = "1.0.19"
 
   input {
   
diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
index d4f5316e89..766f087263 100644
--- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
+++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
@@ -7,7 +7,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
 
 workflow BroadInternalRNAWithUMIs {
 
-  String pipeline_version = "1.0.30"
+  String pipeline_version = "1.0.31"
 
   input {
     # input needs to be either "hg19" or "hg38"
diff --git a/pipelines/broad/qc/CheckFingerprint.wdl b/pipelines/broad/qc/CheckFingerprint.wdl
index 0338466c3b..dcc7ee057f 100644
--- a/pipelines/broad/qc/CheckFingerprint.wdl
+++ b/pipelines/broad/qc/CheckFingerprint.wdl
@@ -24,7 +24,7 @@ import "../../../tasks/broad/Qc.wdl" as Qc
 
 workflow CheckFingerprint {
 
-  String pipeline_version = "1.0.17"
+  String pipeline_version = "1.0.18"
 
   input {
     File? input_vcf
diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
index 0f4fadb666..49c768ba3c 100644
--- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
@@ -7,7 +7,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
 workflow ExomeReprocessing {
 
 
-  String pipeline_version = "3.1.20"
+  String pipeline_version = "3.1.21"
 
   input {
     File? input_cram
diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
index 3ff6daaa8b..84ff050377 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
@@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
 
 workflow ExternalExomeReprocessing {
 
-  String pipeline_version = "3.1.22"
+  String pipeline_version = "3.1.23"
 
 
   input {
diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
index 9776ce06d5..35b77bc3ca 100644
--- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
 workflow ExternalWholeGenomeReprocessing {
 
 
-  String pipeline_version = "2.1.22"
+  String pipeline_version = "2.1.23"
 
   input {
     File? input_cram
diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
index cd4afd70b5..e7f6d51614 100644
--- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
 
 workflow WholeGenomeReprocessing {
 
-  String pipeline_version = "3.1.21"
+  String pipeline_version = "3.1.22"
 
   input {
     File? input_cram
diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl
index 5975a2a46e..dc1f47b00e 100644
--- a/pipelines/skylab/snm3C/snm3C.wdl
+++ b/pipelines/skylab/snm3C/snm3C.wdl
@@ -44,7 +44,7 @@ workflow snm3C {
     }
 
     # version of the pipeline
-    String pipeline_version = "4.0.1"
+    String pipeline_version = "4.0.2"
 
     call Demultiplexing {
         input:

From deb14ab9c134a14e08405e0136eddabbc6e7aad4 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 9 Jul 2024 13:18:53 -0400
Subject: [PATCH 174/186] changelogs

---
 pipelines/skylab/slideseq/SlideSeq.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 0998c8eb9b..1f8b7f7b00 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.7"
+    String pipeline_version = "3.1.8"
 
     input {
         Array[File] r1_fastq

From 55385c69fc399548aa5cb2e185bb01356af11ebb Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Tue, 9 Jul 2024 13:45:31 -0400
Subject: [PATCH 175/186] update dockers

---
 pipelines/skylab/optimus/Optimus.wdl   | 2 +-
 pipelines/skylab/slideseq/SlideSeq.wdl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 7e8d089a6e..239ff28423 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -91,7 +91,7 @@ workflow Optimus {
   String star_docker = "star:1.0.1-2.7.11a-1692706072"
   String warp_tools_docker_2_0_1 = "warp-tools:2.0.1"
   String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985"
-  String star_merge_docker = "star-merge-npz:1.1"
+  String star_merge_docker = "star-merge-npz:1.2"
 
   #TODO how do we handle these?
   String alpine_docker = "alpine-bash:latest"
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 1f8b7f7b00..7d7da7fd50 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -50,7 +50,7 @@ workflow SlideSeq {
     String picard_cloud_docker = "picard-cloud:2.26.10"
     String warp_tools_docker_2_0_1 = "warp-tools:2.0.1"
     String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985"
-    String star_merge_docker = "star-merge-npz:1.1"
+    String star_merge_docker = "star-merge-npz:1.2"
 
     String ubuntu_docker = "ubuntu_16_0_4:latest"
     String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"

From 1722781692cb2589ba3cfc839f1713ff07928761 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 11 Jul 2024 14:08:50 -0400
Subject: [PATCH 176/186] more changelogs ugh

---
 pipelines/skylab/multiome/atac.wdl     | 2 +-
 pipelines/skylab/slideseq/SlideSeq.wdl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index f94002abf9..4822ade914 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -46,7 +46,7 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "2.0.1"
+  String pipeline_version = "2.1.1"
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 7d7da7fd50..a5aba56884 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.8"
+    String pipeline_version = "3.1.9"
 
     input {
         Array[File] r1_fastq

From 6228acaaa2fd9714f1342f0a867a818d0142482d Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 11 Jul 2024 14:10:44 -0400
Subject: [PATCH 177/186] more changelogs ugh

---
 pipelines/skylab/multiome/atac.changelog.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
index f0dd220a3c..ae2a9705bc 100644
--- a/pipelines/skylab/multiome/atac.changelog.md
+++ b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,7 +1,7 @@
 # 2.1.1
 2024-07-11 (Date of Last Commit)
 
-* Updated the atac.wdl to run on Azure
+* Updated the atac.wdl to run on Azure 
 
 # 2.1.0
 2024-07-09 (Date of Last Commit)

From 7f38addfe8c4acdc6f15c5410561c12237adfa6b Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Thu, 11 Jul 2024 14:13:10 -0400
Subject: [PATCH 178/186] more changelogs ugh

---
 pipelines/skylab/multiome/atac.wdl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 4822ade914..baa608552e 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -68,7 +68,6 @@ workflow ATAC {
     }
   }
 
-  String pipeline_version = "2.1.1"
 
   parameter_meta {
     read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads"

From b3854d2be7bf57930018a137d86c8c4c1aeb2729 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 15 Jul 2024 10:18:40 -0400
Subject: [PATCH 179/186] made minor updates, not patches

---
 .../joint_genotyping/reblocking/ReblockGVCF.changelog.md     | 4 ++--
 .../germline/joint_genotyping/reblocking/ReblockGVCF.wdl     | 2 +-
 .../wgs/WholeGenomeGermlineSingleSample.changelog.md         | 4 ++--
 .../single_sample/wgs/WholeGenomeGermlineSingleSample.wdl    | 2 +-
 .../germline/variant_calling/VariantCalling.changelog.md     | 4 ++--
 .../dna_seq/germline/variant_calling/VariantCalling.wdl      | 2 +-
 .../broad/reprocessing/exome/ExomeReprocessing.changelog.md  | 4 ++--
 pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl     | 2 +-
 .../external/exome/ExternalExomeReprocessing.changelog.md    | 2 +-
 .../external/exome/ExternalExomeReprocessing.wdl             | 2 +-
 .../wgs/ExternalWholeGenomeReprocessing.changelog.md         | 4 ++--
 .../external/wgs/ExternalWholeGenomeReprocessing.wdl         | 2 +-
 .../reprocessing/wgs/WholeGenomeReprocessing.changelog.md    | 4 ++--
 pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl | 2 +-
 pipelines/skylab/multiome/Multiome.changelog.md              | 5 +++--
 pipelines/skylab/multiome/Multiome.wdl                       | 2 +-
 pipelines/skylab/multiome/atac.changelog.md                  | 4 ++--
 pipelines/skylab/multiome/atac.wdl                           | 2 +-
 pipelines/skylab/optimus/Optimus.changelog.md                | 4 ++--
 pipelines/skylab/optimus/Optimus.wdl                         | 2 +-
 pipelines/skylab/paired_tag/PairedTag.changelog.md           | 4 ++--
 pipelines/skylab/paired_tag/PairedTag.wdl                    | 2 +-
 pipelines/skylab/slideseq/SlideSeq.changelog.md              | 4 ++--
 pipelines/skylab/slideseq/SlideSeq.wdl                       | 2 +-
 .../MultiSampleSmartSeq2SingleNucleus.changelog.md           | 4 ++--
 .../MultiSampleSmartSeq2SingleNucleus.wdl                    | 2 +-
 26 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md
index 2ca2b89d97..a7e79abe57 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md
@@ -1,7 +1,7 @@
-# 2.1.14
+# 2.2.0
 2024-07-09 (Date of Last Commit)
 
-* Updated ReblockGVCF.wdl to run in Azure.
+* Updated ReblockGVCF.wdl to run in Azure. cloud_provider is a new, required input. 
 
 # 2.1.13
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
index 68408f30b9..f9a14011dc 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
@@ -6,7 +6,7 @@ import "../../../../../../tasks/broad/Utilities.wdl" as utils
 
 workflow ReblockGVCF {
 
-  String pipeline_version = "2.1.14"
+  String pipeline_version = "2.2.0"
 
 
   input {
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
index b0af35698e..7d4242d6fb 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
@@ -1,7 +1,7 @@
-# 3.1.22
+# 3.2.0
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.
 * Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
 
 # 3.1.21
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
index 72183e30db..bc87cfd0cb 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
@@ -40,7 +40,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl"
 workflow WholeGenomeGermlineSingleSample {
 
 
-  String pipeline_version = "3.1.22"
+  String pipeline_version = "3.2.0"
 
 
   input {
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
index 89e0aec3e3..b4eb529e7d 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
@@ -1,7 +1,7 @@
-# 2.1.20
+# 2.2.0
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.
 
 # 2.1.19
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 98b9fb77ee..d352b628bc 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks
 workflow VariantCalling {
 
 
-  String pipeline_version = "2.1.20"
+  String pipeline_version = "2.2.0"
 
 
   input {
diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
index c368809b4e..f42b61ac28 100644
--- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
@@ -1,7 +1,7 @@
-# 3.1.21
+# 3.2.0
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.
 
 # 3.1.20
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
index 49c768ba3c..b63e0501f5 100644
--- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
@@ -7,7 +7,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
 workflow ExomeReprocessing {
 
 
-  String pipeline_version = "3.1.21"
+  String pipeline_version = "3.2.0"
 
   input {
     File? input_cram
diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
index 2cf6161455..37e685e086 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
@@ -1,7 +1,7 @@
 # 3.1.23
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.
 
 # 3.1.22
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
index 84ff050377..49db5591dc 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
@@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
 
 workflow ExternalExomeReprocessing {
 
-  String pipeline_version = "3.1.23"
+  String pipeline_version = "3.2.0"
 
 
   input {
diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
index a276942d1d..6ad12f66c0 100644
--- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
@@ -1,7 +1,7 @@
-# 2.1.23
+# 2.2.0
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.
 
 # 2.1.22
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
index 35b77bc3ca..341be24f78 100644
--- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
 workflow ExternalWholeGenomeReprocessing {
 
 
-  String pipeline_version = "2.1.23"
+  String pipeline_version = "2.2.0"
 
   input {
     File? input_cram
diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
index 42da79d053..856a1a2f1c 100644
--- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
@@ -1,7 +1,7 @@
-# 3.1.22
+# 3.2.0
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.
 
 # 3.1.21
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
index e7f6d51614..a65e723ad3 100644
--- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
 
 workflow WholeGenomeReprocessing {
 
-  String pipeline_version = "3.1.22"
+  String pipeline_version = "3.2.0"
 
   input {
     File? input_cram
diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index 8b26ecfc7f..2cac5dc595 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,5 +1,6 @@
-# 5.2.1
-* Updated the Multiome.wdl to run on Azure
+# 5.3.0
+
+* Updated the Multiome.wdl to run on Azure. cloud_provider is a new, required input.
 
 # 5.2.0
 2024-07-09 (Date of Last Commit)
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index 274141a76a..9da6addf65 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "5.2.1"
+    String pipeline_version = "5.3.0"
 
 
     input {
diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
index ae2a9705bc..f44b5b2328 100644
--- a/pipelines/skylab/multiome/atac.changelog.md
+++ b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,7 +1,7 @@
-# 2.1.1
+# 2.2.0
 2024-07-11 (Date of Last Commit)
 
-* Updated the atac.wdl to run on Azure 
+* Updated the atac.wdl to run on Azure. cloud_provider is a new, required input.
 
 # 2.1.0
 2024-07-09 (Date of Last Commit)
diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index baa608552e..b54f91043b 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -46,7 +46,7 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "2.1.1"
+  String pipeline_version = "2.2.0"
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index 3d353e09c7..9dcb41f6bc 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,7 +1,7 @@
-# 7.3.1
+# 7.4.0
 2024-07-11 (Date of Last Commit)
 
-* Updated the Optimus.wdl to run on Azure
+* Updated the Optimus.wdl to run on Azure. cloud_provider is a new, required input.
 * Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers.
 
 # 7.3.0
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 30a57be5db..43986ffb79 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -71,7 +71,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "7.3.1"
+  String pipeline_version = "7.4.0"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index b75d12836b..d6eefd1ae3 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,7 +1,7 @@
-# 1.2.1
+# 1.3.0
 2024-07-11 (Date of Last Commit)
 
-* Updated the PairedTag.wdl to run on Azure
+* Updated the PairedTag.wdl to run on Azure. cloud_provider is a new, required input.
 
 # 1.2.0
 2024-07-09 (Date of Last Commit)
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index cbf801e0ee..0ac9aeb8db 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow PairedTag {
 
-    String pipeline_version = "1.2.1"
+    String pipeline_version = "1.3.0"
 
 
     input {
diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
index 1ea407cea6..aed4dcd7a7 100644
--- a/pipelines/skylab/slideseq/SlideSeq.changelog.md
+++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,7 +1,7 @@
-# 3.1.9
+# 3.2.0
 2024-07-11 (Date of Last Commit)
 
-* Updated the Optimus.wdl to run on Azure. This change does not affect the SlideSeq pipeline.
+* Updated the Optimus.wdl to run on Azure. cloud_provider is a new, required input.
 
 # 3.1.8
 2024-07-09 (Date of Last Commit)
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index a5aba56884..1de6ce6fdf 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.9"
+    String pipeline_version = "3.2.0"
 
     input {
         Array[File] r1_fastq
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
index 3850f9db8b..4d00d91015 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
@@ -1,7 +1,7 @@
-# 1.3.6
+# 1.4.0
 2024-07-11 (Date of Last Commit)
 
-* Updated the PairedTag.wdl to run on Azure. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline.
+* Updated the PairedTag.wdl to run on Azure. cloud_provider is a new, required input.
 * Added new optional input parameter of gex_nhash_id to the STARAlign task; this does not impact the MultiSampleSmartSeq2SingleNucleus workflow 
 
 # 1.3.5
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
index 9007815574..61673ffcb5 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
@@ -57,7 +57,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
   }
 
   # Version of this pipeline
-  String pipeline_version = "1.3.6"
+  String pipeline_version = "1.4.0"
 
   if (false) {
      String? none = "None"

From 2543a79702d0aabe05ba9ff77d7cffb9e4146dab Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 15 Jul 2024 10:45:40 -0400
Subject: [PATCH 180/186] pointing to pinned version of dockers or the sha

---
 .../germline/joint_genotyping/reblocking/ReblockGVCF.wdl      | 2 +-
 .../single_sample/exome/ExomeGermlineSingleSample.wdl         | 2 +-
 .../broad/dna_seq/germline/variant_calling/VariantCalling.wdl | 2 +-
 pipelines/skylab/optimus/Optimus.wdl                          | 4 ++--
 pipelines/skylab/slideseq/SlideSeq.wdl                        | 2 +-
 .../MultiSampleSmartSeq2SingleNucleus.wdl                     | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
index f9a14011dc..e64854bf30 100644
--- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
+++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl
@@ -27,7 +27,7 @@ workflow ReblockGVCF {
   String gvcf_basename = basename(gvcf, gvcf_file_extension)
   # docker images
   String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
-  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
+  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:1.0.0"
   String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
 
   # make sure either gcp or azr is supplied as cloud_provider input
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
index 00769b467c..1197dcbdd2 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
@@ -69,7 +69,7 @@ workflow ExomeGermlineSingleSample {
 
   # docker images
   String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
-  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
+  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:1.0.0"
   String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
 
   # make sure either gcp or azr is supplied as cloud_provider input
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index d352b628bc..ba265a80ef 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -41,7 +41,7 @@ workflow VariantCalling {
 
   # docker images
   String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0"
-  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:latest"
+  String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:1.0.0"
   String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure
   
   String gatk_1_3_docker_gcp = "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384"
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 43986ffb79..0312b41b02 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -96,12 +96,12 @@ workflow Optimus {
   String star_merge_docker = "star-merge-npz:1.2"
 
   #TODO how do we handle these?
-  String alpine_docker = "alpine-bash:latest"
+  String alpine_docker = "alpine-bash@sha256:965a718a07c700a5204c77e391961edee37477634ce2f9cf652a8e4c2db858ff"
   String gcp_alpine_docker_prefix = "bashell/"
   String acr_alpine_docker_prefix = "dsppipelinedev.azurecr.io/"
   String alpine_docker_prefix = if cloud_provider == "gcp" then gcp_alpine_docker_prefix else acr_alpine_docker_prefix
 
-  String ubuntu_docker = "ubuntu_16_0_4:latest"
+  String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
   String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
   String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/"
   String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 1de6ce6fdf..409e3123b6 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -52,7 +52,7 @@ workflow SlideSeq {
     String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985"
     String star_merge_docker = "star-merge-npz:1.2"
 
-    String ubuntu_docker = "ubuntu_16_0_4:latest"
+    String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
     String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
     String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/"
     String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
index 61673ffcb5..068b35003d 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
@@ -43,7 +43,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
       String cloud_provider
   }
 
-  String ubuntu_docker = "ubuntu_16_0_4:latest"
+  String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
   String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
   String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/"
   String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix

From a002052f10c61b1f94d73edd0744da2c9980b788 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 15 Jul 2024 10:49:05 -0400
Subject: [PATCH 181/186] extra space

---
 ...omeGermlineSingleSample.inputs.plumbing.masked_reference.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json
index 309e93f9bd..871c5589de 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json
@@ -51,7 +51,6 @@
   "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi",
   "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list",
   "WholeGenomeGermlineSingleSample.cloud_provider": "gcp",
-
   "WholeGenomeGermlineSingleSample.papi_settings": {
     "preemptible_tries": 3,
     "agg_preemptible_tries": 3

From e3724ebd3010be4286a58cb26630452afd57fe27 Mon Sep 17 00:00:00 2001
From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
Date: Mon, 15 Jul 2024 10:50:40 -0400
Subject: [PATCH 182/186] Apply suggestions from code review

Co-authored-by: ekiernan <55763654+ekiernan@users.noreply.github.com>
---
 .../single_sample/exome/ExomeGermlineSingleSample.changelog.md  | 2 +-
 .../ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md        | 2 +-
 .../wgs/WholeGenomeGermlineSingleSample.changelog.md            | 2 +-
 .../ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md        | 2 +-
 .../genotyping/illumina/IlluminaGenotypingArray.changelog.md    | 2 +-
 .../arrays/imputation/BroadInternalImputation.changelog.md      | 2 +-
 .../internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md      | 2 +-
 pipelines/broad/qc/CheckFingerprint.changelog.md                | 2 +-
 pipelines/skylab/optimus/Optimus.changelog.md                   | 2 +-
 pipelines/skylab/snm3C/snm3C.changelog.md                       | 2 +-
 website/docs/Pipelines/Multiome_Pipeline/README.md              | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
index daaa5acd40..45a45b81eb 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
@@ -1,7 +1,7 @@
 # 3.1.21
 2024-07-09 (Date of Last Commit)
 
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers.
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers
 
 # 3.1.20
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
index d355ea7e04..88c0a43cbf 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
@@ -1,7 +1,7 @@
 # 1.0.18
 2024-07-09 (Date of Last Commit)
 
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline
 
 # 1.0.17
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
index 7d4242d6fb..f8be38c09f 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
@@ -2,7 +2,7 @@
 2024-07-09 (Date of Last Commit)
 
 * Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers; this change does not affect this pipeline
 
 # 3.1.21
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
index af72457ba8..21120b9c7e 100644
--- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
+++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
@@ -1,7 +1,7 @@
 # 1.0.18
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline
 
 # 1.0.17
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md
index 0a006dc85a..7e775a6553 100644
--- a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md
+++ b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md
@@ -1,7 +1,7 @@
 # 1.12.19
 2024-07-09 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers
 
 # 1.12.18
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
index 6d45d66333..4ff223caf2 100644
--- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
+++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
@@ -1,7 +1,7 @@
 # 1.1.11
 2024-05-21 (Date of Last Commit)
 
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers; this change does not affect this pipeline
 
 # 1.1.10
 2023-12-18 (Date of Last Commit)
diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
index 652e81bb01..407ff0c43d 100644
--- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
+++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
@@ -1,6 +1,6 @@
 # 1.0.31
 2024-07-09
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline
 
 # 1.0.30
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/broad/qc/CheckFingerprint.changelog.md b/pipelines/broad/qc/CheckFingerprint.changelog.md
index f139f145ad..9f11431c50 100644
--- a/pipelines/broad/qc/CheckFingerprint.changelog.md
+++ b/pipelines/broad/qc/CheckFingerprint.changelog.md
@@ -1,7 +1,7 @@
 # 1.0.18
 2024-07-00 (Date of Last Commit)
 
-* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline.
+* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline
 
 # 1.0.17
 2024-07-01 (Date of Last Commit)
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index 9dcb41f6bc..cdbfca2c42 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -2,7 +2,7 @@
 2024-07-11 (Date of Last Commit)
 
 * Updated the Optimus.wdl to run on Azure. cloud_provider is a new, required input.
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers.
+* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers
 
 # 7.3.0
 2024-07-09 (Date of Last Commit)
diff --git a/pipelines/skylab/snm3C/snm3C.changelog.md b/pipelines/skylab/snm3C/snm3C.changelog.md
index 327b404dfb..8cf6455276 100644
--- a/pipelines/skylab/snm3C/snm3C.changelog.md
+++ b/pipelines/skylab/snm3C/snm3C.changelog.md
@@ -1,7 +1,7 @@
 # 4.0.2
 2024-07-09 (Date of Last Commit)
 
-* Updated the snM3C wdl to run on Azure. This change does not affect the snM3C pipeline.
+* Updated the snM3C wdl to run on Azure; this change does not affect the snM3C pipeline
 
 # 4.0.1
 2024-06-26 (Date of Last Commit)
diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md
index 51a820034d..bfe793457a 100644
--- a/website/docs/Pipelines/Multiome_Pipeline/README.md
+++ b/website/docs/Pipelines/Multiome_Pipeline/README.md
@@ -121,7 +121,7 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt
 | multimappers_Rescue_matrix | `UniqueAndMult-Rescue.mtx` | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. |
 | multimappers_PropUnique_matrix | `UniqueAndMult-PropUnique.mtx` | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.|
 | gex_aligner_metrics | `<input_id>.star_metrics.tar` | Text file containing per barcode metrics (`CellReads.stats`) produced by the GEX pipeline STARsolo aligner. |
-| library_metrics | `<input_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. |
+| library_metrics | `<input_id>_<nhash_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. |
 | mtx_files | `<input_id>.mtx_files.tar` | TAR file with STARsolo matrix market files (barcodes.tsv, features.tsv, and matrix.mtx) | TAR |
 | cell_barcodes_csv | `<cell_csv>` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.|
 | checkpoint_file | `<ckpt_file>` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. |

From 7807103ab81d8be91dfdd95b306c0b28393c7ef8 Mon Sep 17 00:00:00 2001
From: npetrill <npetrill@broadinstitute.org>
Date: Mon, 15 Jul 2024 10:54:21 -0400
Subject: [PATCH 183/186] changelog

---
 .../external/exome/ExternalExomeReprocessing.changelog.md       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
index 37e685e086..ea7abd045b 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
@@ -1,4 +1,4 @@
-# 3.1.23
+# 3.2.0
 2024-07-09 (Date of Last Commit)
 
 * Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input.

From ad7d2a88fc21c6a3318c6ec90a804887317a80e6 Mon Sep 17 00:00:00 2001
From: ekiernan <ekiernan@broadinstitute.org>
Date: Mon, 15 Jul 2024 12:53:19 -0400
Subject: [PATCH 184/186] doc reformatting

---
 website/docs/Pipelines/ATAC/README.md               | 4 ++--
 website/docs/Pipelines/PairedTag_Pipeline/README.md | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md
index 286ea6898a..06989e960f 100644
--- a/website/docs/Pipelines/ATAC/README.md
+++ b/website/docs/Pipelines/ATAC/README.md
@@ -44,8 +44,8 @@ ATAC can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/stable/
 ## Input Variables
 The following describes the inputs of the ATAC workflow. For more details on how default inputs are set for the Multiome workflow, see the [Multiome overview](../Multiome_Pipeline/README).
 
-| Variable name | Description                                                                                                     |
-| --- |-----------------------------------------------------------------------------------------------------------------|
+| Variable name | Description |
+| --- |--- |
 | read1_fastq_gzipped | Fastq inputs (array of compressed read 1 FASTQ files).                                                          |
 | read2_fastq_gzipped | Fastq inputs (array of compressed read 2 FASTQ files containing cellular barcodes).                             |
 | read3_fastq_gzipped | Fastq inputs (array of compressed read 3 FASTQ files).                                                          |
diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md
index 679bf97318..a203d53447 100644
--- a/website/docs/Pipelines/PairedTag_Pipeline/README.md
+++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md
@@ -6,7 +6,7 @@ slug: /Pipelines/PairedTag_Pipeline/README
 # Paired-Tag Overview
 
 |                          Pipeline Version                           | Date Updated | Documentation Author | Questions or Feedback |
-|:-------------------------------------------------------------------:| :---: | :----: | :--------------: |
+|:---:| :---: | :---: | :---: |
 | [PairedTag_v1.0.1](https://github.com/broadinstitute/warp/releases) | June, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) |
 
 
@@ -91,7 +91,7 @@ The Paired-Tag workflow inputs are specified in JSON configuration files. Exampl
 The Paired-Tag workflow calls two WARP subworkflows and an additional task which are described briefly in the table below. For more details on each subworkflow and task, see the documentation and WDL scripts linked in the table.
 
 | Subworkflow/Task | Software | Description | 
-| ----------- | -------- | ----------- |
+| --- | --- | --- |
 | Optimus ([WDL](https://github.com/broadinstitute/warp/blob/develop/pipelines/skylab/optimus/Optimus.wdl) and [documentation](../Optimus_Pipeline/README)) | fastqprocess, STARsolo, Emptydrops | Workflow used to analyze 10x single-cell GEX data. |
 | PairedTagDemultiplex as demultiplex ([WDL](https://github.com/broadinstitute/warp/blob/develop/tasks/skylab/PairedTagUtils.wdl)) | UPStools | Task used to check the length of the read2 FASTQ (should be either 27 or 24 bp). If `preindex` is set to true, the task will perform demultiplexing of the 3-bp sample barcode from the read2 ATAC fastq files and stores it in the readname. It will then perform barcode orientation checking. The ATAC workflow will then add a combined 3 bp sample barcode and cellular barcode to the BB tag of the BAM. If `preindex` is false and then length is 27 bp, the task will perform trimming and subsequent barcode orientation checking. |
 | ATAC ([WDL](https://github.com/broadinstitute/warp/blob/develop/pipelines/skylab/multiome/atac.wdl) and [documentation](../ATAC/README)) | fastqprocess, bwa-mem, SnapATAC2 | Workflow used to analyze single-nucleus paired-tag DNA (histone modifications) data. |

From 3c4d9115bb937220f614d22ebdd8f3b37b466c6d Mon Sep 17 00:00:00 2001
From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
Date: Mon, 15 Jul 2024 13:00:39 -0400
Subject: [PATCH 185/186] Update
 pipelines/broad/arrays/imputation/Imputation.changelog.md

---
 pipelines/broad/arrays/imputation/Imputation.changelog.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/broad/arrays/imputation/Imputation.changelog.md b/pipelines/broad/arrays/imputation/Imputation.changelog.md
index 02b32dc771..e798bc7671 100644
--- a/pipelines/broad/arrays/imputation/Imputation.changelog.md
+++ b/pipelines/broad/arrays/imputation/Imputation.changelog.md
@@ -1,5 +1,5 @@
 # 1.1.13
-2023-05-21 (Date of Last Commit)
+2024-05-21 (Date of Last Commit)
 
 * Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
 

From 0549763e85dea41aba941bdd44f9b2b63bc82b66 Mon Sep 17 00:00:00 2001
From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com>
Date: Mon, 15 Jul 2024 13:07:05 -0400
Subject: [PATCH 186/186] Update
 pipelines/skylab/slideseq/SlideSeq.changelog.md

Co-authored-by: ekiernan <55763654+ekiernan@users.noreply.github.com>
---
 pipelines/skylab/slideseq/SlideSeq.changelog.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
index aed4dcd7a7..bdef191cc9 100644
--- a/pipelines/skylab/slideseq/SlideSeq.changelog.md
+++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,7 +1,7 @@
 # 3.2.0
 2024-07-11 (Date of Last Commit)
 
-* Updated the Optimus.wdl to run on Azure. cloud_provider is a new, required input.
+* Updated the Optimus.wdl to run on Azure; cloud_provider is a new, required input
 
 # 3.1.8
 2024-07-09 (Date of Last Commit)