From e52f2c1aabdb88b565c132e5587d05b91c4e6c0f Mon Sep 17 00:00:00 2001
From: ekiernan <55763654+ekiernan@users.noreply.github.com>
Date: Thu, 11 Jul 2024 09:30:14 -0400
Subject: [PATCH 1/2] added nhash_id to multiome, optimus, paired-tag, and atac
 (#1316)

* added nhash_id to multiome, optimus, paired-tag, and atac

* changed to null input

* Fix testing wdls for nhash_id

* fixing nhash id errors

* fixed nhash_id examples and verification wdls

* made gex_nash_id optional in STARAlign mergestaroutputs task

* fixing h5ad utils

* fixing nash id python variable in atac wdl

* fixed h5ad variable name in atac fragment file creation

* fixed echo in STARAlign for nhash id

* passing nhash id through pipeline

* updated changelogs

* updated documentation for nhash_id
---
 .../skylab/multiome/Multiome.changelog.md     |  4 +-
 pipelines/skylab/multiome/Multiome.wdl        |  7 +++-
 pipelines/skylab/multiome/atac.changelog.md   |  3 +-
 pipelines/skylab/multiome/atac.wdl            | 14 +++++--
 .../Plumbing/10k_pbmc_downsampled.json        |  3 +-
 .../test_inputs/Scientific/10k_pbmc.json      |  3 +-
 pipelines/skylab/optimus/Optimus.changelog.md |  5 +++
 pipelines/skylab/optimus/Optimus.wdl          | 12 ++++--
 .../Plumbing/human_v3_example.json            |  3 +-
 .../Plumbing/mouse_v2_example.json            |  1 +
 .../Plumbing/mouse_v2_snRNA_example.json      |  1 +
 .../Scientific/inputs_8k_pbmc.json            |  1 +
 .../Scientific/inputs_8k_pbmc_stranded.json   |  1 +
 .../skylab/paired_tag/PairedTag.changelog.md  |  4 +-
 pipelines/skylab/paired_tag/PairedTag.wdl     | 10 +++--
 .../Plumbing/10k_pbmc_downsampled.json        |  3 +-
 .../Plumbing/BC011_BC015_downsampled.json     |  3 +-
 .../Plumbing/BI015_downsampled.json           |  3 +-
 .../test_inputs/Scientific/10k_pbmc.json      |  3 +-
 .../test_inputs/Scientific/BC011_10kPBMC.json |  3 +-
 .../skylab/slideseq/SlideSeq.changelog.md     |  5 +++
 pipelines/skylab/slideseq/SlideSeq.wdl        |  2 +-
 ...iSampleSmartSeq2SingleNucleus.changelog.md |  4 ++
 .../MultiSampleSmartSeq2SingleNucleus.wdl     |  2 +-
 tasks/skylab/H5adUtils.wdl                    | 37 +++++++++++++++++++
 tasks/skylab/StarAlign.wdl                    |  8 +++-
 verification/test-wdls/TestMultiome.wdl       |  5 ++-
 verification/test-wdls/TestOptimus.wdl        |  4 +-
 verification/test-wdls/TestPairedTag.wdl      |  4 +-
 website/docs/Pipelines/ATAC/README.md         |  1 +
 .../Pipelines/ATAC/count-matrix-overview.md   |  1 +
 .../Pipelines/Multiome_Pipeline/README.md     |  3 +-
 .../Optimus_Pipeline/Library-metrics.md       |  1 +
 .../Pipelines/Optimus_Pipeline/Loom_schema.md |  1 +
 .../docs/Pipelines/Optimus_Pipeline/README.md |  3 +-
 35 files changed, 138 insertions(+), 30 deletions(-)

diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index 3063a9389f..94a1c07022 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,8 +1,10 @@
-# 5.1.2
+# 5.2.0
 2024-07-09 (Date of Last Commit)
 
+* Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the ATAC fragment h5ad, the gene expression h5ad (in the data.uns), and the gene expression library metrics CSV output; default is set to null
 * Added test statements again for GH action (to release from develop). Will probably revert
 
+
 # 5.1.0
 2024-06-28 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index f57d25ec0a..607d78d8f9 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -6,10 +6,13 @@ import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender
 
 workflow Multiome {
-    String pipeline_version = "5.1.2"
+
+    String pipeline_version = "5.2.0"
 
     input {
         String input_id
+        # Additional library aliquot ID
+        String? nhash_id
 
         # Optimus Inputs
         String counting_mode = "sn_rna"
@@ -57,6 +60,7 @@ workflow Multiome {
             i1_fastq = gex_i1_fastq,
             input_id = input_id + "_gex",
             output_bam_basename = input_id + "_gex",
+            gex_nhash_id = nhash_id,
             tar_star_reference = tar_star_reference,
             annotations_gtf = annotations_gtf,
             mt_genes = mt_genes,
@@ -82,6 +86,7 @@ workflow Multiome {
             whitelist = atac_whitelist,
             adapter_seq_read1 = adapter_seq_read1,
             annotations_gtf = annotations_gtf,
+            atac_nhash_id = nhash_id,
             adapter_seq_read3 = adapter_seq_read3
     }
     call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes {
diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md
index 89ac275882..34587f7f6f 100644
--- a/pipelines/skylab/multiome/atac.changelog.md
+++ b/pipelines/skylab/multiome/atac.changelog.md
@@ -1,6 +1,7 @@
-# 2.0.2
+# 2.1.0
 2024-07-09 (Date of Last Commit)
 
+* Added new optional input parameter of atac_nhash_id, an identifier for a library aliquot that is echoed in the atac fragment metrics h5ad (in the data.uns); default is set to null 
 * Added test statements again for GH action (to release from develop). Will probably revert
 
 # 2.0.0
diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl
index 08ad6649de..2a43694bca 100644
--- a/pipelines/skylab/multiome/atac.wdl
+++ b/pipelines/skylab/multiome/atac.wdl
@@ -18,6 +18,8 @@ workflow ATAC {
 
     # Output prefix/base name for all intermediate files and pipeline outputs
     String input_id
+    # Additional library aliquot ID
+    String? atac_nhash_id
 
     # Option for running files with preindex
     Boolean preindex = false
@@ -41,7 +43,7 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "2.0.2"
+  String pipeline_version = "2.1.0"
 
   parameter_meta {
     read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads"
@@ -105,7 +107,8 @@ workflow ATAC {
         bam = BBTag.bb_bam,
         chrom_sizes = chrom_sizes,
         annotations_gtf = annotations_gtf,
-        preindex = preindex
+        preindex = preindex,
+        atac_nhash_id = atac_nhash_id
     }
   }
   if (!preindex) {
@@ -114,7 +117,8 @@ workflow ATAC {
         bam = BWAPairedEndAlignment.bam_aligned_output,
         chrom_sizes = chrom_sizes,
         annotations_gtf = annotations_gtf,
-        preindex = preindex
+        preindex = preindex,
+        atac_nhash_id = atac_nhash_id
 
     }
   }
@@ -447,6 +451,7 @@ task CreateFragmentFile {
     Int mem_size = 16
     Int nthreads = 4
     String cpuPlatform = "Intel Cascade Lake"
+    String atac_nhash_id = ""
   }
 
   String bam_base_name = basename(bam, ".bam")
@@ -470,6 +475,7 @@ task CreateFragmentFile {
     chrom_sizes = "~{chrom_sizes}"
     atac_gtf = "~{annotations_gtf}"
     preindex = "~{preindex}"
+    atac_nhash_id = "~{atac_nhash_id}"
 
     # calculate chrom size dictionary based on text file
     chrom_size_dict={}
@@ -494,6 +500,8 @@ task CreateFragmentFile {
     # those settings allow us to retain all barcodes
     pp.import_data("~{bam_base_name}.fragments.tsv", file="temp_metrics.h5ad", chrom_sizes=chrom_size_dict, min_num_fragments=0)
     atac_data = ad.read_h5ad("temp_metrics.h5ad")
+    # Add nhash_id to h5ad file as unstructured metadata
+    atac_data.uns['NHashID'] = atac_nhash_id
     # calculate tsse metrics
     snap.metrics.tsse(atac_data, atac_gtf)
     # Write new atac file
diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
index 7d15111f38..297bdfa2dc 100644
--- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
+++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -23,5 +23,6 @@
   "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
   "Multiome.Atac.num_threads_bwa":"16",
   "Multiome.Atac.mem_size_bwa":"64", 
-  "Multiome.soloMultiMappers":"Uniform"
+  "Multiome.soloMultiMappers":"Uniform",
+  "Multiome.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json
index a5ddf2c947..c4965dd9a4 100644
--- a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json
+++ b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json
@@ -30,5 +30,6 @@
   "Multiome.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
   "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
   "Multiome.Atac.num_threads_bwa":"24",
-  "Multiome.Atac.mem_size_bwa":"175"
+  "Multiome.Atac.mem_size_bwa":"175",
+  "Multiome.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index 67813f5920..4860fcb13a 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,3 +1,8 @@
+# 7.3.0
+2024-07-09 (Date of Last Commit)
+
+* Added new optional input parameter of gex_nhash_id, a string identifier for a library aliquot that is echoed in the h5ad cell by gene matrix (in the data.uns) and the library metrics CSV output; default is set to null 
+
 # 7.2.0
 2024-06-28 (Date of Last Commit)
 
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 1cdb128f17..fd79a6d50a 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -22,6 +22,8 @@ workflow Optimus {
     Array[File] r2_fastq
     Array[File]? i1_fastq
     String input_id
+    # String for additional library aliquot ID
+    String? gex_nhash_id = ""
     String output_bam_basename = input_id
     String? input_name
     String? input_id_metadata_field
@@ -66,7 +68,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "7.2.0"
+  String pipeline_version = "7.3.0"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
@@ -170,7 +172,8 @@ workflow Optimus {
       umipercell = STARsoloFastq.umipercell,
       input_id = input_id,
       counting_mode = counting_mode,
-      expected_cells = expected_cells
+      expected_cells = expected_cells,
+      gex_nhash_id = gex_nhash_id
   }
   if (counting_mode == "sc_rna"){
     call RunEmptyDrops.RunEmptyDrops {
@@ -186,6 +189,7 @@ workflow Optimus {
     call H5adUtils.OptimusH5adGeneration{
       input:
         input_id = input_id,
+        gex_nhash_id = gex_nhash_id,
         input_name = input_name,
         input_id_metadata_field = input_id_metadata_field,
         input_name_metadata_field = input_name_metadata_field,
@@ -212,11 +216,13 @@ workflow Optimus {
         summary = STARsoloFastq.summary_sn_rna,
         align_features = STARsoloFastq.align_features_sn_rna,
         umipercell = STARsoloFastq.umipercell_sn_rna,
-        input_id = input_id     
+        input_id = input_id,
+        gex_nhash_id = gex_nhash_id     
     }
     call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{
       input:
         input_id = input_id,
+        gex_nhash_id = gex_nhash_id,
         input_name = input_name,
         input_id_metadata_field = input_id_metadata_field,
         input_name_metadata_field = input_name_metadata_field,
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json
index 612659d25c..087a8667d5 100644
--- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json
+++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json
@@ -15,5 +15,6 @@
   "Optimus.input_id": "pbmc_human_v3",
   "Optimus.tenx_chemistry_version": "3",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf",
-  "Optimus.star_strand_mode": "Forward"
+  "Optimus.star_strand_mode": "Forward",
+  "Optimus.gex_nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json
index 0dc26af9fd..39e5cf9b83 100644
--- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json
+++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json
@@ -27,5 +27,6 @@
   "Optimus.input_id": "neurons2k_mouse",
   "Optimus.tenx_chemistry_version": "2",
   "Optimus.star_strand_mode": "Unstranded",
+  "Optimus.gex_nhash_id":"example_1234",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf"
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json
index 787a1a8347..c4f712a56b 100644
--- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json
+++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json
@@ -25,5 +25,6 @@
   "Optimus.star_strand_mode": "Unstranded",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf",
   "Optimus.counting_mode": "sn_rna",
+  "Optimus.gex_nhash_id":"example_1234",
   "Optimus.count_exons": true
 }
diff --git a/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc.json b/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc.json
index 773af4f2f4..10d30103b3 100644
--- a/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc.json
+++ b/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc.json
@@ -15,6 +15,7 @@
   "Optimus.input_id": "8k_pbmc",
   "Optimus.tenx_chemistry_version": "2",
   "Optimus.star_strand_mode": "Unstranded",
+  "Optimus.gex_nhash_id":"example_1234",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf"
 }
 
diff --git a/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc_stranded.json b/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc_stranded.json
index 98c9c9912d..bcdf703f58 100644
--- a/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc_stranded.json
+++ b/pipelines/skylab/optimus/test_inputs/Scientific/inputs_8k_pbmc_stranded.json
@@ -15,6 +15,7 @@
   "Optimus.input_id": "8k_pbmc",
   "Optimus.tenx_chemistry_version": "2",
   "Optimus.star_strand_mode": "Forward",
+  "Optimus.gex_nhash_id":"example_1234",
   "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/gencode.v27.primary_assembly.annotation.gtf"
 }
 
diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index be80a53604..747762b380 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,8 +1,10 @@
-# 1.1.2
+# 1.2.0
 2024-07-09 (Date of Last Commit)
 
+* Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the  workflow fragment h5ad, the Optimus workflow gene expression h5ad (in the data.uns), and the Optimus gene expression library metrics CSV output; default is set to null
 * Added test statements again for GH action (to release from develop). Will probably revert
 
+
 # 1.1.0
 2024-06-28 (Date of Last Commit)
 
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index 8417bfbe50..b647ade474 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -5,10 +5,12 @@ import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
 import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
 import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing
 workflow PairedTag {
-    String pipeline_version = "1.1.2"
+    String pipeline_version = "1.2.0"
 
     input {
         String input_id
+        # Additional library aliquot id
+        String? nhash_id
 
         # Optimus Inputs
         String counting_mode = "sn_rna"
@@ -63,7 +65,8 @@ workflow PairedTag {
             ignore_r1_read_length = ignore_r1_read_length,
             star_strand_mode = star_strand_mode,
             count_exons = count_exons,
-            soloMultiMappers = soloMultiMappers
+            soloMultiMappers = soloMultiMappers,
+            gex_nhash_id = nhash_id
     }
 
     # Call the ATAC workflow
@@ -91,7 +94,8 @@ workflow PairedTag {
             adapter_seq_read1 = adapter_seq_read1,
             adapter_seq_read3 = adapter_seq_read3,
             annotations_gtf = annotations_gtf,
-            preindex = preindex
+            preindex = preindex,
+            atac_nhash_id = nhash_id
     }
 
     if (preindex) {
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json
index 44e7247682..c2ad3acc9c 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json
@@ -23,5 +23,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json
index 9e7b18b679..6401549cae 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json
@@ -23,5 +23,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json
index 2bdd7a8fe2..2c2f9e5afa 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json
@@ -23,5 +23,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.nhash_id":"example_1234"
 }
diff --git a/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json
index d4265b3c2a..2e600d192f 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Scientific/10k_pbmc.json
@@ -32,5 +32,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"24",
   "PairedTag.Atac_preindex.mem_size_bwa":"175", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.nhash_id":"example_1234"
 }
\ No newline at end of file
diff --git a/pipelines/skylab/paired_tag/test_inputs/Scientific/BC011_10kPBMC.json b/pipelines/skylab/paired_tag/test_inputs/Scientific/BC011_10kPBMC.json
index 7fd31930c9..d1f5030d5a 100644
--- a/pipelines/skylab/paired_tag/test_inputs/Scientific/BC011_10kPBMC.json
+++ b/pipelines/skylab/paired_tag/test_inputs/Scientific/BC011_10kPBMC.json
@@ -29,5 +29,6 @@
   "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
   "PairedTag.Atac_preindex.num_threads_bwa":"16",
   "PairedTag.Atac_preindex.mem_size_bwa":"64", 
-  "PairedTag.soloMultiMappers":"Uniform"
+  "PairedTag.soloMultiMappers":"Uniform",
+  "PairedTag.nhash_id":"example_1234"
 }
\ No newline at end of file
diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
index dd4d9f9248..cb7c2cebbf 100644
--- a/pipelines/skylab/slideseq/SlideSeq.changelog.md
+++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,3 +1,8 @@
+# 3.1.8
+2024-07-09 (Date of Last Commit)
+
+* Added new optional input parameter of gex_nhash_id to the STARAlign task; this does not impact the SlideSeq workflow 
+
 # 3.1.7
 2024-06-28 (Date of Last Commit)
 
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 09dac4dff8..3f8ddc3548 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -23,7 +23,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.1.7"
+    String pipeline_version = "3.1.8"
 
     input {
         Array[File] r1_fastq
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
index 4813b84c11..cfe0955206 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
@@ -1,3 +1,7 @@
+# 1.3.6
+2024-07-09 (Date of Last Commit)
+* Added new optional input parameter of gex_nhash_id to the STARAlign task; this does not impact the MultiSampleSmartSeq2SingleNucleus workflow 
+
 # 1.3.5
 2024-06-28 (Date of Last Commit)
 
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
index 9de5a0f92b..f31a3cf253 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
@@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
       String? input_id_metadata_field
   }
   # Version of this pipeline
-  String pipeline_version = "1.3.5"
+  String pipeline_version = "1.3.6"
 
   if (false) {
      String? none = "None"
diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl
index 924b19a770..3c5e60b585 100644
--- a/tasks/skylab/H5adUtils.wdl
+++ b/tasks/skylab/H5adUtils.wdl
@@ -9,6 +9,7 @@ task OptimusH5adGeneration {
     String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
     # name of the sample
     String input_id
+    String gex_nhash_id = ""
     # user provided id
     String? input_name
     String? input_id_metadata_field
@@ -85,6 +86,23 @@ task OptimusH5adGeneration {
           --expression_data_type "whole_transcript"\
           --pipeline_version ~{pipeline_version}
     fi
+
+    # modify h5ad
+    python3 <<CODE
+
+    # set parameters
+    gex_h5ad = "~{input_id}.h5ad"
+    gex_nhash_id = "~{gex_nhash_id}"
+
+    # import anndata to manipulate h5ad files
+    import anndata as ad
+    import pandas as pd
+    print("Reading Optimus h5ad:")
+    print(gex_h5ad)
+    gex_data = ad.read_h5ad(gex_h5ad)
+    gex_data.uns['NHashID'] = gex_nhash_id
+    gex_data.write("~{input_id}.h5ad")
+    CODE 
   >>>
 
   runtime {
@@ -108,6 +126,8 @@ task SingleNucleusOptimusH5adOutput {
         String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1"
         # name of the sample
         String input_id
+        # additional aliquot id
+        String gex_nhash_id = ""
         # user provided id
         String? input_name
         String? input_id_metadata_field
@@ -167,6 +187,23 @@ task SingleNucleusOptimusH5adOutput {
         ~{"--input_name_metadata_field " + input_name_metadata_field} \
         --expression_data_type "whole_transcript" \
         --pipeline_version ~{pipeline_version}
+
+        # modify h5ad
+        python3 <<CODE
+
+        # set parameters
+        gex_h5ad = "~{input_id}.h5ad"
+        gex_nhash_id = "~{gex_nhash_id}"
+
+        # import anndata to manipulate h5ad files
+        import anndata as ad
+        import pandas as pd
+        print("Reading Optimus h5ad:")
+        print(gex_h5ad)
+        gex_data = ad.read_h5ad(gex_h5ad)
+        gex_data.uns['NHashID'] = gex_nhash_id
+        gex_data.write("~{input_id}.h5ad")
+        CODE
     }
 
     runtime {
diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index 100dcb28bb..41e62ba7a9 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -443,6 +443,8 @@ task MergeStarOutput {
     String? counting_mode
     
     String input_id
+    # additional library aliquot id
+    String gex_nhash_id = ""
     Int expected_cells = 3000
     File barcodes_single = barcodes[0]
     File features_single = features[0]
@@ -571,6 +573,10 @@ task MergeStarOutput {
       outputbarcodes.tsv \
       outputmatrix.mtx \
       ~{expected_cells}
+      echo "Adding NHashID to library metrics"
+      cp ~{input_id}_library_metrics.csv ~{input_id}_library_metrics_backup.csv
+      { echo -e "~{gex_nhash_id}\n"; cat ~{input_id}_library_metrics.csv; } > ~{input_id}_~{gex_nhash_id}_library_metrics.csv
+      echo "tarring STAR txt files"
       tar -zcvf ~{input_id}.star_metrics.tar *.txt
     else
       echo "No text files found in the folder."
@@ -599,7 +605,7 @@ task MergeStarOutput {
     File col_index = "~{input_id}_sparse_counts_col_index.npy"
     File sparse_counts = "~{input_id}_sparse_counts.npz"
     File? cell_reads_out = "~{input_id}.star_metrics.tar"
-    File? library_metrics="~{input_id}_library_metrics.csv"
+    File? library_metrics="~{input_id}_~{gex_nhash_id}_library_metrics.csv"
     File? mtx_files ="~{input_id}.mtx_files.tar"
   }
 }
diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl
index 9a4a0ec83a..1f8b71ba0b 100644
--- a/verification/test-wdls/TestMultiome.wdl
+++ b/verification/test-wdls/TestMultiome.wdl
@@ -10,6 +10,7 @@ workflow TestMultiome {
 
     input {
       String input_id
+      String nhash_id
 
       # Optimus Inputs
       String counting_mode = "sn_rna"
@@ -85,8 +86,8 @@ workflow TestMultiome {
         chrom_sizes = chrom_sizes,
         atac_whitelist = atac_whitelist,
         run_cellbender = run_cellbender,
-        soloMultiMappers = soloMultiMappers
-  
+        soloMultiMappers = soloMultiMappers,
+        nhash_id = nhash_id
     }
 
     
diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl
index 82bdf03adc..cd9097f70c 100644
--- a/verification/test-wdls/TestOptimus.wdl
+++ b/verification/test-wdls/TestOptimus.wdl
@@ -17,6 +17,7 @@ workflow TestOptimus {
     Array[File] r2_fastq
     Array[File]? i1_fastq
     String input_id
+    String gex_nhash_id
     String output_bam_basename = input_id
     String? input_name
     String? input_id_metadata_field
@@ -84,7 +85,8 @@ workflow TestOptimus {
       star_strand_mode           = star_strand_mode,
       count_exons                = count_exons,
       ignore_r1_read_length      = ignore_r1_read_length,
-      soloMultiMappers           = soloMultiMappers
+      soloMultiMappers           = soloMultiMappers,
+      gex_nhash_id               = gex_nhash_id
   }
 
   # Collect all of the pipeling output into single Array
diff --git a/verification/test-wdls/TestPairedTag.wdl b/verification/test-wdls/TestPairedTag.wdl
index 9695fb98e6..2cfd56cebd 100644
--- a/verification/test-wdls/TestPairedTag.wdl
+++ b/verification/test-wdls/TestPairedTag.wdl
@@ -10,6 +10,7 @@ workflow TestPairedTag {
 
     input {
       String input_id
+      String nhash_id
 
       # Optimus Inputs
       String counting_mode = "sn_rna"
@@ -86,7 +87,8 @@ workflow TestPairedTag {
         adapter_seq_read3 = adapter_seq_read3,
         chrom_sizes = chrom_sizes,
         atac_whitelist = atac_whitelist,
-        soloMultiMappers = soloMultiMappers
+        soloMultiMappers = soloMultiMappers,
+        nhash_id = nhash_id
     }
 
     
diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md
index ac4c411e6e..3f922e86a7 100644
--- a/website/docs/Pipelines/ATAC/README.md
+++ b/website/docs/Pipelines/ATAC/README.md
@@ -50,6 +50,7 @@ The following describes the inputs of the ATAC workflow. For more details on how
 | read2_fastq_gzipped | Fastq inputs (array of compressed read 2 FASTQ files containing cellular barcodes). |
 | read3_fastq_gzipped | Fastq inputs (array of compressed read 3 FASTQ files). |
 | input_id | Output prefix/base name for all intermediate files and pipeline outputs. |
+| atac_nhash_id | String that represents an optional library aliquot identifier. When used, it is echoed in the h5ad unstructured data. |
 | preindex | Boolean used for paired-tag data and not applicable to ATAC data types; default is set to false. | 
 | tar_bwa_reference | BWA reference (tar file containing reference fasta and corresponding files). |
 | num_threads_bwa | Optional integer defining the number of CPUs per node for the BWA-mem alignment task (default: 128). |
diff --git a/website/docs/Pipelines/ATAC/count-matrix-overview.md b/website/docs/Pipelines/ATAC/count-matrix-overview.md
index d18c1eb41d..ea03e788c1 100644
--- a/website/docs/Pipelines/ATAC/count-matrix-overview.md
+++ b/website/docs/Pipelines/ATAC/count-matrix-overview.md
@@ -18,6 +18,7 @@ The global attributes (unstuctured metadata) in the h5ad apply to the whole file
 | Attribute | Program | Details |
 | --- | --- | --- |
 | `reference_sequences` | [SnapATAC2](https://github.com/kaizhang/SnapATAC2) | Data frame containing the chromosome sizes for the genome build (i.e., hg38); created using the [`chrom_sizes` pipeline input](README.md). |
+| `NHashID` | N/A | A string that represents the NHashID if specified in the workflow |
 
 
 ## Table 2. Cell metrics
diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md
index dfa8d4bd91..308a7e8bab 100644
--- a/website/docs/Pipelines/Multiome_Pipeline/README.md
+++ b/website/docs/Pipelines/Multiome_Pipeline/README.md
@@ -55,6 +55,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta
 | Input name | Description | Type |
 | --- | --- | --- |
 | input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID. | String |
+| nhash_id | Optional identifier for the library aliquot; when specified, the workflow will echo the ID in the ATAC and gene expression output h5ads (in the adata.uns section) and in the library-level metrics CSV. |
 | annotations_gtf | GTF file containing gene annotations used for GEX cell metric calculation and ATAC fragment metrics; must match the GTF used to build the STAR aligner. | File |
 | gex_r1_fastq | Array of read 1 FASTQ files representing a single GEX 10x library. | Array[File] |
 | gex_r2_fastq | Array of read 2 FASTQ files representing a single GEX 10x library.| Array[File] |
@@ -120,7 +121,7 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt
 | multimappers_PropUnique_matrix | `UniqueAndMult-PropUnique.mtx` | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.|
 | gex_aligner_metrics | `<input_id>.star_metrics.tar` | Text file containing per barcode metrics (`CellReads.stats`) produced by the GEX pipeline STARsolo aligner. |
 | mtx_files | `<input_id>.mtx_files.tar` | TAR file with STARsolo matrix market files (barcodes.tsv, features.tsv, and matrix.mtx) | TAR |
-| library_metrics | `<input_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. |
+| library_metrics | `<input_id>_<nhash_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. |
 | cell_barcodes_csv | `<cell_csv>` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.|
 | checkpoint_file | `<ckpt_file>` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. |
 | h5_array | `<h5_array>` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. |
diff --git a/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md b/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md
index 38cd48398e..46f0811b7d 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md
@@ -11,6 +11,7 @@ To produce the library-level metrics here, the [combined_mtx.py script](https://
 
 | Metric | Description |
 | ---| --- |
+| nhash_id | The first line of of the metrics CSV echos the NHash ID if specified in the workflow run |
 | number_of_reads | Total number of reads.|
 | sequencing_saturation | Proportion of unique molecular identifiers (UMIs) observed relative to the total number of possible UMIs. |
 | fraction_of_unique_reads_mapped_to_genome | Fraction of unique reads that map to the genome. |
diff --git a/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md b/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md
index 8bf61109e8..ce811e1621 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md
@@ -32,6 +32,7 @@ The global attributes (unstuctured metadata) in the h5ad apply to the whole file
 | `input_id_metadata_field` | Optional string that describes, when applicable, the metadata field containing the `input_id`. |
 | `input_name_metadata_field` | Optional string that describes, when applicable, the metadata field containing the `input_name`. |
 | `pipeline_version` | String describing the version of the Optimus pipeline run on the data. |
+| `NHashID` | String that represents NHashID (an optional library aliquot identifier) if specified during the worfklow run. |
 
 ## Table 2. Cell metrics
 
diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md
index e6ca0b8187..2c5979d658 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/README.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/README.md
@@ -89,6 +89,7 @@ The example configuration files also contain metadata for the reference files, d
 | read_struct | String describing the structure of reads; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) read structure that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom read structure can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom read structure, set the input `force_no_check` to "true". | N/A |
 | tar_star_reference | TAR file containing a species-specific reference genome and GTF; it is generated using the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl). | N/A |
 | input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID. | N/A |
+| gex_nhash_id | Optional string to identify the library aliquot; will be echoed in the output h5ad file in the adata.uns and the library-level metrics CSV; default is null (`""`) | N/A |
 | input_name | Optional string that can be used to further identify the original biological sample. | N/A |
 | input_id_metadata_field | Optional string describing, when applicable, the metadata field containing the input_id. | N/A |
 | input_name_metadata_field | Optional string describing, when applicable, the metadata field containing the input_name. | N/A |
@@ -256,7 +257,7 @@ The following table lists the output files produced from the pipeline. For sampl
 | cell_metrics | `<input_id>.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV |
 | gene_metrics | `<input_id>.gene-metrics.csv.gz` |  Matrix of metrics by genes. | Compressed CSV |
 | aligner_metrics | `<input_id>.star_metrics.tar` | Tarred metrics files produced by the STARsolo aligner; contains align features, cell reads, summary, and UMI per cell metrics files. | TXT |
-| library_metrics | `<input_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. See the [Library-level metrics](./Library-metrics.md) for how metrics are calculated. | CSV |
+| library_metrics | `<input_id>_<gex_nash_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. See the [Library-level metrics](./Library-metrics.md) for how metrics are calculated. | CSV |
 | multimappers_EM_matrix | `UniqueAndMult-EM.mtx` | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX |
 | multimappers_Uniform_matrix | `UniqueAndMult-Uniform.mtx` | Optional output produced when `soloMultiMappers` is "Uniform"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX |
 | multimappers_Rescue_matrix | `UniqueAndMult-Rescue.mtx` | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX |

From b8a753edd8e761a835164cc7b84f136c7fbe51d1 Mon Sep 17 00:00:00 2001
From: Kevin Palis <palis@broadinstitute.org>
Date: Thu, 11 Jul 2024 13:11:54 -0400
Subject: [PATCH 2/2] Fixing branch detection. Adding error handling if branch
 provided is neither develop or master (#1329)

---
 .github/workflows/warp_release.yml | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/warp_release.yml b/.github/workflows/warp_release.yml
index 495c047f7b..37d1c0e3b7 100644
--- a/.github/workflows/warp_release.yml
+++ b/.github/workflows/warp_release.yml
@@ -1,4 +1,3 @@
-
 # This action releases from develop/master for all changed pipelines
 name: WARP Release
 
@@ -40,16 +39,20 @@ jobs:
       run: |
         source scripts/common.sh
         set -e
-        if [[ "${GIT_BRANCH}" == "develop" ]]; then
+        BRANCH_NAME=$(echo "${GITHUB_REF#refs/heads/}")
+        if [[ "${BRANCH_NAME}" == "develop" ]]; then
             ENV=dev
-        elif [[ "${GIT_BRANCH}" == "master" ]]; then
+        elif [[ "${BRANCH_NAME}" == "master" ]]; then
             ENV=prod
+        else
+            echo "Error: Branch ${BRANCH_NAME} is not a valid release branch."
+            exit 1
         fi
         echo $ENV
         echo "Getting all changed pipelines since last commit before releasing from develop"
         previous_commit_hash=$(git rev-parse HEAD^1)
         changed_pipelines=$(get_modified_pipelines ${previous_commit_hash})
-        echo branch: ${GIT_BRANCH} previous_commit_hash: ${previous_commit_hash} env: ${ENV}
+        echo branch: ${BRANCH_NAME} previous_commit_hash: ${previous_commit_hash} env: ${ENV}
         if [[ -n ${ENV} ]]; then
             if [[ -n ${changed_pipelines[@]} ]]; then
                 for pipeline in ${changed_pipelines[@]}; do
@@ -62,6 +65,4 @@ jobs:
             echo "Releases are only made on merge to develop and master" 
         fi
       env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      
-      
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file