Merge branch 'develop' into azurized_wdls

broadinstitute · Jul 11, 2024 · a12c5a7 · a12c5a7
2 parents 55385c6 + b8a753e
commit a12c5a7
Show file tree

Hide file tree

Showing 34 changed files with 174 additions and 60 deletions.
diff --git a/.github/workflows/warp_release.yml b/.github/workflows/warp_release.yml
@@ -1,4 +1,3 @@
-
 # This action releases from develop/master for all changed pipelines
 name: WARP Release
 
@@ -40,16 +39,20 @@ jobs:
       run: |
         source scripts/common.sh
         set -e
-        if [[ "${GIT_BRANCH}" == "develop" ]]; then
+        BRANCH_NAME=$(echo "${GITHUB_REF#refs/heads/}")
+        if [[ "${BRANCH_NAME}" == "develop" ]]; then
             ENV=dev
-        elif [[ "${GIT_BRANCH}" == "master" ]]; then
+        elif [[ "${BRANCH_NAME}" == "master" ]]; then
             ENV=prod
+        else
+            echo "Error: Branch ${BRANCH_NAME} is not a valid release branch."
+            exit 1
         fi
         echo $ENV
         echo "Getting all changed pipelines since last commit before releasing from develop"
         previous_commit_hash=$(git rev-parse HEAD^1)
         changed_pipelines=$(get_modified_pipelines ${previous_commit_hash})
-        echo branch: ${GIT_BRANCH} previous_commit_hash: ${previous_commit_hash} env: ${ENV}
+        echo branch: ${BRANCH_NAME} previous_commit_hash: ${previous_commit_hash} env: ${ENV}
         if [[ -n ${ENV} ]]; then
             if [[ -n ${changed_pipelines[@]} ]]; then
                 for pipeline in ${changed_pipelines[@]}; do
@@ -62,6 +65,4 @@ jobs:
             echo "Releases are only made on merge to develop and master" 
         fi
       env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,8 +1,11 @@
-# 5.1.1
+# 5.2.1
+* Updated the Multiome.wdl to run on Azure
+
+# 5.2.0
 2024-07-09 (Date of Last Commit)
 
+* Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the ATAC fragment h5ad, the gene expression h5ad (in the data.uns), and the gene expression library metrics CSV output; default is set to null
 * Added test statements again for GH action (to release from develop). Will probably revert
-* Updated the Multiome.wdl to run on Azure
 
 # 5.1.0
 2024-06-28 (Date of Last Commit)

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
@@ -9,12 +9,14 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "5.1.1"
+    String pipeline_version = "5.2.1"
 
 
     input {
         String cloud_provider
         String input_id
+        # Additional library aliquot ID
+        String? nhash_id
 
         # Optimus Inputs
         String counting_mode = "sn_rna"
@@ -87,6 +89,7 @@ workflow Multiome {
             i1_fastq = gex_i1_fastq,
             input_id = input_id + "_gex",
             output_bam_basename = input_id + "_gex",
+            gex_nhash_id = nhash_id,
             tar_star_reference = tar_star_reference,
             annotations_gtf = annotations_gtf,
             mt_genes = mt_genes,
@@ -113,9 +116,10 @@ workflow Multiome {
             chrom_sizes = chrom_sizes,
             whitelist = atac_whitelist,
             adapter_seq_read1 = adapter_seq_read1,
-            adapter_seq_read3 = adapter_seq_read3,
             vm_size = vm_size,
-            annotations_gtf = annotations_gtf
+            annotations_gtf = annotations_gtf,
+            atac_nhash_id = nhash_id,
+            adapter_seq_read3 = adapter_seq_read3
     }
     call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes {
         input:

diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,7 +1,12 @@
-# 2.0.1
-2024-07-09 (Date of Last Commit)
+# 2.1.1
+2024-07-11 (Date of Last Commit)
 
 * Updated the atac.wdl to run on Azure
+
+# 2.1.0
+2024-07-09 (Date of Last Commit)
+
+* Added new optional input parameter of atac_nhash_id, an identifier for a library aliquot that is echoed in the atac fragment metrics h5ad (in the data.uns); default is set to null 
 * Added test statements again for GH action (to release from develop). Will probably revert
 
 # 2.0.0

diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
@@ -20,6 +20,8 @@ workflow ATAC {
     # Output prefix/base name for all intermediate files and pipeline outputs
     String input_id
     String cloud_provider
+    # Additional library aliquot ID
+    String? atac_nhash_id
 
     # Option for running files with preindex
     Boolean preindex = false
@@ -66,6 +68,7 @@ workflow ATAC {
     }
   }
 
+  String pipeline_version = "2.1.1"
 
   parameter_meta {
     read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads"
@@ -136,7 +139,8 @@ workflow ATAC {
         chrom_sizes = chrom_sizes,
         annotations_gtf = annotations_gtf,
         preindex = preindex,
-        docker_path = docker_prefix + snap_atac_docker
+        docker_path = docker_prefix + snap_atac_docker,
+        atac_nhash_id = atac_nhash_id
     }
   }
   if (!preindex) {
@@ -146,7 +150,8 @@ workflow ATAC {
         chrom_sizes = chrom_sizes,
         annotations_gtf = annotations_gtf,
         preindex = preindex,
-        docker_path = docker_prefix + snap_atac_docker
+        docker_path = docker_prefix + snap_atac_docker,
+        atac_nhash_id = atac_nhash_id
 
     }
   }
@@ -505,6 +510,7 @@ task CreateFragmentFile {
     Int nthreads = 4
     String cpuPlatform = "Intel Cascade Lake"
     String docker_path
+    String atac_nhash_id = ""
   }
 
   String bam_base_name = basename(bam, ".bam")
@@ -529,6 +535,7 @@ task CreateFragmentFile {
     chrom_sizes = "~{chrom_sizes}"
     atac_gtf = "~{annotations_gtf}"
     preindex = "~{preindex}"
+    atac_nhash_id = "~{atac_nhash_id}"
 
     # calculate chrom size dictionary based on text file
     chrom_size_dict={}
@@ -553,6 +560,8 @@ task CreateFragmentFile {
     # those settings allow us to retain all barcodes
     pp.import_data("~{bam_base_name}.fragments.tsv", file="temp_metrics.h5ad", chrom_sizes=chrom_size_dict, min_num_fragments=0)
     atac_data = ad.read_h5ad("temp_metrics.h5ad")
+    # Add nhash_id to h5ad file as unstructured metadata
+    atac_data.uns['NHashID'] = atac_nhash_id
     # calculate tsse metrics
     snap.metrics.tsse(atac_data, atac_gtf)
     # Write new atac file

diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -24,5 +24,6 @@
   "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
   "Multiome.Atac.num_threads_bwa":"16",
   "Multiome.Atac.mem_size_bwa":"64", 
-  "Multiome.soloMultiMappers":"Uniform"
+  "Multiome.soloMultiMappers":"Uniform",
+  "Multiome.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json
@@ -31,5 +31,6 @@
   "Multiome.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
   "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
   "Multiome.Atac.num_threads_bwa":"24",
-  "Multiome.Atac.mem_size_bwa":"175"
+  "Multiome.Atac.mem_size_bwa":"175",
+  "Multiome.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,9 +1,13 @@
-# 7.2.1
-2024-07-09 (Date of Last Commit)
+# 7.3.1
+2024-07-11 (Date of Last Commit)
 
 * Updated the Optimus.wdl to run on Azure
 * Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers.
 
+# 7.3.0
+2024-07-09 (Date of Last Commit)
+
+* Added new optional input parameter of gex_nhash_id, a string identifier for a library aliquot that is echoed in the h5ad cell by gene matrix (in the data.uns) and the library metrics CSV output; default is set to null 
 
 # 7.2.0
 2024-06-28 (Date of Last Commit)

diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
@@ -25,6 +25,8 @@ workflow Optimus {
     Array[File] r2_fastq
     Array[File]? i1_fastq
     String input_id
+    # String for additional library aliquot ID
+    String? gex_nhash_id = ""
     String output_bam_basename = input_id
     String? input_name
     String? input_id_metadata_field
@@ -69,7 +71,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "7.2.1"
+  String pipeline_version = "7.3.1"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
@@ -221,7 +223,8 @@ workflow Optimus {
       input_id = input_id,
       counting_mode = counting_mode,
       star_merge_docker_path = docker_prefix + star_merge_docker,
-      expected_cells = expected_cells
+      expected_cells = expected_cells,
+      gex_nhash_id = gex_nhash_id
   }
   if (counting_mode == "sc_rna"){
     call RunEmptyDrops.RunEmptyDrops {
@@ -238,6 +241,7 @@ workflow Optimus {
     call H5adUtils.OptimusH5adGeneration{
       input:
         input_id = input_id,
+        gex_nhash_id = gex_nhash_id,
         input_name = input_name,
         input_id_metadata_field = input_id_metadata_field,
         input_name_metadata_field = input_name_metadata_field,
@@ -266,12 +270,13 @@ workflow Optimus {
         align_features = STARsoloFastq.align_features_sn_rna,
         umipercell = STARsoloFastq.umipercell_sn_rna,
         input_id = input_id,
-        star_merge_docker_path = docker_prefix + star_merge_docker
-
+        star_merge_docker_path = docker_prefix + star_merge_docker,
+        gex_nhash_id = gex_nhash_id     
     }
     call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{
       input:
         input_id = input_id,
+        gex_nhash_id = gex_nhash_id,
         input_name = input_name,
         input_id_metadata_field = input_id_metadata_field,
         input_name_metadata_field = input_name_metadata_field,

diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json
@@ -16,5 +16,6 @@
   "Optimus.tenx_chemistry_version": "3",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf",
   "Optimus.star_strand_mode": "Forward",
-  "Optimus.cloud_provider": "gcp"
+  "Optimus.cloud_provider": "gcp",
+  "Optimus.gex_nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json
@@ -27,6 +27,7 @@
   "Optimus.input_id": "neurons2k_mouse",
   "Optimus.tenx_chemistry_version": "2",
   "Optimus.star_strand_mode": "Unstranded",
-  "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf",
-  "Optimus.cloud_provider": "gcp"
+  "Optimus.cloud_provider": "gcp",
+  "Optimus.gex_nhash_id":"example_1234",
+  "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf"
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json
@@ -26,5 +26,6 @@
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf",
   "Optimus.counting_mode": "sn_rna",
   "Optimus.count_exons": true,
-  "Optimus.cloud_provider": "gcp"
+  "Optimus.cloud_provider": "gcp",
+  "Optimus.gex_nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc.json b/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc.json
@@ -15,6 +15,7 @@
   "Optimus.input_id": "8k_pbmc",
   "Optimus.tenx_chemistry_version": "2",
   "Optimus.star_strand_mode": "Unstranded",
+  "Optimus.gex_nhash_id":"example_1234",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf"
 }
 

diff --git a/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc_stranded.json b/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc_stranded.json
@@ -15,6 +15,7 @@
   "Optimus.input_id": "8k_pbmc",
   "Optimus.tenx_chemistry_version": "2",
   "Optimus.star_strand_mode": "Forward",
+  "Optimus.gex_nhash_id":"example_1234",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/gencode.v27.primary_assembly.annotation.gtf"
 }
 

diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,7 +1,12 @@
-# 1.1.1
-2024-07-09 (Date of Last Commit)
+# 1.2.1
+2024-07-11 (Date of Last Commit)
 
 * Updated the PairedTag.wdl to run on Azure
+
+# 1.2.0
+2024-07-09 (Date of Last Commit)
+
+* Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the  workflow fragment h5ad, the Optimus workflow gene expression h5ad (in the data.uns), and the Optimus gene expression library metrics CSV output; default is set to null
 * Added test statements again for GH action (to release from develop). Will probably revert
 
 # 1.1.0

diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -7,10 +7,14 @@ import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing
 import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow PairedTag {
-    String pipeline_version = "1.1.1"
+
+    String pipeline_version = "1.2.1"
+
 
     input {
         String input_id
+        # Additional library aliquot id
+        String? nhash_id
 
         # Optimus Inputs
         String counting_mode = "sn_rna"
@@ -92,7 +96,8 @@ workflow PairedTag {
             star_strand_mode = star_strand_mode,
             count_exons = count_exons,
             cloud_provider = cloud_provider,
-            soloMultiMappers = soloMultiMappers
+            soloMultiMappers = soloMultiMappers,
+            gex_nhash_id = nhash_id
     }
 
     # Call the ATAC workflow
@@ -125,7 +130,8 @@ workflow PairedTag {
             annotations_gtf = annotations_gtf,
             preindex = preindex,
             cloud_provider = cloud_provider,
-            vm_size = vm_size
+            vm_size = vm_size,
+            atac_nhash_id = nhash_id
     }
 
     if (preindex) {

diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -24,5 +24,6 @@
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64",
   "PairedTag.soloMultiMappers":"Uniform",
-  "PairedTag.cloud_provider": "gcp"
+  "PairedTag.cloud_provider": "gcp",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json
@@ -24,5 +24,6 @@
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
   "PairedTag.soloMultiMappers":"Uniform",
-  "PairedTag.cloud_provider": "gcp"
+  "PairedTag.cloud_provider": "gcp",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json
@@ -24,5 +24,6 @@
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
   "PairedTag.soloMultiMappers":"Uniform",
-  "PairedTag.cloud_provider": "gcp"
+  "PairedTag.cloud_provider": "gcp",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json
@@ -32,5 +32,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"24",
   "PairedTag.Atac_preindex.mem_size_bwa":"175", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Scientific/BC011_10kPBMC.json b/pipelines/skylab/paired_tag/test_inputs/Scientific/BC011_10kPBMC.json
@@ -29,5 +29,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,8 +1,12 @@
+# 3.1.9
+2024-07-11 (Date of Last Commit)
+
+* Updated the Optimus.wdl to run on Azure. This change does not affect the SlideSeq pipeline.
+
 # 3.1.8
 2024-07-09 (Date of Last Commit)
 
-* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline.
-* Updated the Optimus.wdl to run on Azure. This change does not affect the SlideSeq pipeline.
+* Added new optional input parameter of gex_nhash_id to the STARAlign task; this does not impact the SlideSeq workflow 
 
 # 3.1.7
 2024-06-28 (Date of Last Commit)