From 8ec744581112f5c4a61de75dba01df54916f3fa1 Mon Sep 17 00:00:00 2001
From: ekiernan <ekiernan@broadinstitute.org>
Date: Tue, 6 Aug 2024 15:06:17 -0400
Subject: [PATCH] updated changelogs and documentation

---
 pipelines/skylab/atac/atac.changelog.md                |  7 ++++++-
 pipelines/skylab/atac/atac.wdl                         |  2 +-
 pipelines/skylab/multiome/Multiome.changelog.md        |  5 +++++
 pipelines/skylab/multiome/Multiome.wdl                 |  2 +-
 pipelines/skylab/optimus/Optimus.changelog.md          |  9 +++++++--
 pipelines/skylab/optimus/Optimus.wdl                   |  2 +-
 pipelines/skylab/paired_tag/PairedTag.changelog.md     |  8 ++++++--
 pipelines/skylab/paired_tag/PairedTag.wdl              |  2 +-
 pipelines/skylab/slideseq/SlideSeq.changelog.md        | 10 ++++++++--
 pipelines/skylab/slideseq/SlideSeq.wdl                 |  2 +-
 .../docs/Pipelines/Optimus_Pipeline/Library-metrics.md |  5 +++--
 website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md |  4 ++++
 12 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md
index e1b799a83e..ffe875fa0b 100644
--- a/pipelines/skylab/atac/atac.changelog.md
+++ b/pipelines/skylab/atac/atac.changelog.md
@@ -1,5 +1,10 @@
+# 2.2.3
+2024-08-02 (Date of Last Commit)
+
+* Updated the warp-tools docker which now includes new metric calculations for mitochondria reads; this does not impact the ATAC workflow
+
 # 2.2.2
-2024-08-02 (Dat of Last Commit)
+2024-08-02 (Date of Last Commit)
 
 * The ubuntu_16_0_4 docker image version was pinned instead of using the latest tag; this does not affect the outputs of the pipeline
 
diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl
index 031084e6a9..45f6a7175d 100644
--- a/pipelines/skylab/atac/atac.wdl
+++ b/pipelines/skylab/atac/atac.wdl
@@ -46,7 +46,7 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "2.2.2"
+  String pipeline_version = "2.2.3"
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
index 886ebe1816..afc52d57f9 100644
--- a/pipelines/skylab/multiome/Multiome.changelog.md
+++ b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,3 +1,8 @@
+# 5.5.0
+2024-08-06 (Date of Last Commit)
+
+* Updated the warp-tools docker to calculate mitochondrial reads from unique reads in cell and gene metrics; these metrics are in the cell and gene metrics CSV as well as h5ad
+
 # 5.4.1
 2024-08-02 (Date of Last Commit)
 
diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
index bdce763a28..21584c01dd 100644
--- a/pipelines/skylab/multiome/Multiome.wdl
+++ b/pipelines/skylab/multiome/Multiome.wdl
@@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "5.4.1"
+    String pipeline_version = "5.5.0"
 
 
     input {
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index 4b946dac51..f64f9eb4bc 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,10 +1,15 @@
+# 7.6.0
+2024-08-06 (Date of Last Commit)
+
+* Updated the warp-tools docker to calculate mitochondrial reads from unique reads in cell and gene metrics; these metrics are in the cell and gene metrics CSV as well as h5ad
+
 # 7.5.1
-2024-08-02 (Dat of Last Commit)
+2024-08-02 (Date of Last Commit)
 
 * The ubuntu_16_0_4 docker image version was pinned instead of using the latest tag; this does not affect the outputs of the pipeline
 
 # 7.5.0
-2024-07-25 (Dat of Last Commit)
+2024-07-25 (Date of Last Commit)
 
 * Updated the warp-tools docker image to add TSO metrics to the output h5ad and metric CSV files
 * Update the library-level metrics to include new TSO metrics and NHashID descriptor
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 327ae70ea7..975439c9f3 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -71,7 +71,7 @@ workflow Optimus {
   # version of this pipeline
 
 
-  String pipeline_version = "7.5.1"
+  String pipeline_version = "7.6.0"
 
 
   # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
index 9500df9b29..e9da183ec0 100644
--- a/pipelines/skylab/paired_tag/PairedTag.changelog.md
+++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,10 +1,14 @@
+# 1.5.0
+2024-08-06 (Date of Last Commit)
+
+* Updated the warp-tools docker to calculate mitochondrial reads from unique reads in cell and gene metrics; these metrics are in the cell and gene metrics CSV as well as h5ad
 # 1.4.1
-2024-08-02 (Dat of Last Commit)
+2024-08-02 (Date of Last Commit)
 
 * The ubuntu_16_0_4 docker image version was pinned instead of using the latest tag; this does not affect the outputs of the pipeline
 
 # 1.4.0
-2024-07-25 (Dat of Last Commit)
+2024-07-25 (Date of Last Commit)
 
 * Updated the warp-tools docker image to add TSO metrics to the output h5ad and metric CSV files
 * Update the library-level metrics to include new TSO metrics and NHashID descriptor
diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
index 379642c8ca..e35a153def 100644
--- a/pipelines/skylab/paired_tag/PairedTag.wdl
+++ b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow PairedTag {
 
-    String pipeline_version = "1.4.1"
+    String pipeline_version = "1.5.0"
 
 
     input {
diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md
index b59cbdc4b0..4c49b67467 100644
--- a/pipelines/skylab/slideseq/SlideSeq.changelog.md
+++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md
@@ -1,10 +1,16 @@
+# 3.4.0
+2024-08-06 (Date of Last Commit)
+
+* Updated the warp-tools docker to calculate mitochondrial reads from unique reads in cell and gene metrics; these metrics are in the cell and gene metrics CSV as well as h5ad
+
+
 # 3.3.1
-2024-08-02 (Dat of Last Commit)
+2024-08-02 (Date of Last Commit)
 
 * The ubuntu_16_0_4 docker image version was pinned instead of using the latest tag; this does not affect the outputs of the pipeline
 
 # 3.3.0
-2024-07-25 (Dat of Last Commit)
+2024-07-25 (Date of Last Commit)
 
 * Updated the warp-tools docker image to add TSO metrics to the output h5ad and metric CSV files
 
diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl
index 0ce0fe5632..c449818881 100644
--- a/pipelines/skylab/slideseq/SlideSeq.wdl
+++ b/pipelines/skylab/slideseq/SlideSeq.wdl
@@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow SlideSeq {
 
-    String pipeline_version = "3.3.1"
+    String pipeline_version = "3.4.0"
 
     input {
         Array[File] r1_fastq
diff --git a/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md b/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md
index cc704eace8..143b8f0730 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/Library-metrics.md
@@ -18,7 +18,7 @@ To produce the library-level metrics here, the [combined_mtx.py script](https://
 | fraction_of_unique_and_multiple_reads_mapped_to_genome| Fraction of both unique and multiple reads that map to the genome. |
 | fraction_of_reads_with_Q30_bases_in_rna | Fraction of reads with base quality score ≥ Q30 in RNA sequences. |
 | fraction_of_reads_with_Q30_bases_in_cb_and_umi | Fraction of reads with base quality score ≥ Q30 in cell barcode (CB) and unique molecular identifier (UMI). |
-| fraction_of_reads_with_valid_barcodes | Fraction of reads with valid cell barcodes.                                                                   |
+| fraction_of_reads_with_valid_barcodes | Fraction of reads with valid cell barcodes. |
 | reads_mapped_antisense_to_gene | Number of reads mapped antisense to gene regions.  |
 | reads_mapped_confidently_exonic | Number of reads mapped confidently to exonic regions. |
 | reads_mapped_confidently_to_genome | Number of reads mapped confidently to the genome. |
@@ -41,4 +41,5 @@ To produce the library-level metrics here, the [combined_mtx.py script](https://
 | keeper_median_genes | Median genes per cell for cells with >1500 genes or nuclei with >1000 genes.  |
 | keeper_cells | Number of cells with >1500 genes or nuclei with >1000 genes.|
 | percent_keeper | Percentage of keeper cells. Calculated as: keeper_cells / estimated_cells |
-| percent_usable | Percentage of usable cells. Calculated as: keeper_cells / expected_cells |
\ No newline at end of file
+| percent_usable | Percentage of usable cells. Calculated as: keeper_cells / expected_cells |
+| frac_tso | Fraction of reads containing TSO sequence. Calculated as the number of reads that have 20 bp or more of TSO Sequence clipped from 5' end/ total number of reads. | 
\ No newline at end of file
diff --git a/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md b/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md
index ce811e1621..83e07ba73a 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/Loom_schema.md
@@ -42,6 +42,7 @@ The global attributes (unstuctured metadata) in the h5ad apply to the whole file
 |`cell_names` |  [TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort) | The unique identifier for each cell based on cell barcodes; identical to `CellID`. |
 | `input_id` | Provided as pipeline input | The sample or cell ID listed in the pipeline configuration file. This can be any string, but we recommend it be consistent with any sample metadata. |
 |`n_reads`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of reads associated with the cell. Like all metrics, `n_reads` is calculated from the Optimus output BAM file. Prior to alignment, reads are checked against the whitelist and any within one edit distance (Hamming distance) are corrected. These CB-corrected reads are aligned using STARsolo, where they get further CB correction. For this reason, most reads in the aligned BAM file have both `CB` and `UB` tags. Therefore, `n_reads` represents CB-corrected reads, rather than all reads in the input FASTQ files. |
+| `tso_reads` | [TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort) | The number of reads that have 20 or more bp of TSO sequence clipped from the 5' end. Calculated using the first number of cN tag in the BAM, which is specific to the number of TSO nucleotides clipped. | 
 |`noise_reads`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| Number of reads that are categorized by 10x Genomics Cell Ranger as "noise". Refers to long polymers, or reads with high numbers of N (ambiguous) nucleotides. |
 |`perfect_molecule_barcodes`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of reads with molecule barcodes (sequences used to identify unique transcripts) that have no errors. Learn more about UMIs in the [Definitions](#definitions) section below. |
 | `reads_mapped_exonic` | STARsolo and [TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort) | The number of unique reads counted as exon; counted when BAM file's `sF` tag is assigned to `1` or `3` and the `NH:i` tag is `1`; mitochondrial reads are excluded. |
@@ -68,6 +69,7 @@ The global attributes (unstuctured metadata) in the h5ad apply to the whole file
 |`fragments_per_molecule`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The average number of fragments associated with each molecule in the cell. |
 |`fragments_with_single_read_evidence`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of fragments associated with the cell that are observed by only one read. |
 |`molecules_with_single_read_evidence`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)|The number of molecules associated with the cell that are observed by only one read. |
+| `reads_mapped_mitochondrial` | [TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort) | The number unique reads (NH:i:1 BAM tag) that come from mitochondrial genes. | 
 |`perfect_cell_barcodes`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)|The number of reads whose cell barcodes contain no error. |
 |`reads_mapped_too_many_loci`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of reads that were mapped to too many loci across the genome and as a consequence, are reported unmapped by the aligner. |
 |`cell_barcode_fraction_bases_above_30_variance`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The variance of the fraction of Illumina base calls for the cell barcode sequence that are greater than 30, across molecules. |
@@ -92,6 +94,7 @@ The global attributes (unstuctured metadata) in the h5ad apply to the whole file
 | `Gene` | [GENCODE GTF](https://www.gencodegenes.org/) | The unique `gene_name` provided in the GENCODE GTF file; identical to the `gene_names` attribute. |
 |`gene_names` | [GENCODE GTF](https://www.gencodegenes.org/) | The unique `gene_name` provided in the GENCODE GTF file; identical to the `Gene` attribute. |
 |`n_reads`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of reads associated with this gene. |
+| `tso_reads` | [TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort) | The number of reads that have 20 or more bp of TSO sequence clipped from the 5' end. Calculated using the first number of cN tag in the BAM, which is specific to the number of TSO nucleotides clipped. |
 |`noise_reads`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| Not currently calculated for Optimus output; number of reads that are categorized by 10x Genomics Cell Ranger as "noise"; refers to long polymers, or reads with high numbers of N (ambiguous) nucleotides. |
 |`perfect_molecule_barcodes`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of reads with molecule barcodes (sequences used to identify unique transcripts) that have no errors. Learn more about UMIs in the [Definitions](#definitions) section below. |
 | `reads_mapped_exonic` | STARsolo and [TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort) | The number of unique reads counted as exon; counted when BAM file's `sF` tag is assigned to `1` or `3` and the `NH:i` tag is `1`; mitochondrial reads are excluded. |
@@ -116,6 +119,7 @@ The global attributes (unstuctured metadata) in the h5ad apply to the whole file
 |`fragments_per_molecule`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The average number of fragments associated with each molecule in the gene. |
 |`fragments_with_single_read_evidence`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of fragments associated with the gene that are observed by only one read. |
 |`molecules_with_single_read_evidence`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of molecules associated with the gene that are observed by only one read. |
+| `reads_mapped_mitochondrial` | [TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort) | The number unique reads (NH:i:1 BAM tag) that come from mitochondrial genes. |
 |`number_cells_detected_multiple`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of cells which observe more than one read of the gene. |
 |`number_cells_expressing`|[TagSort](https://github.com/broadinstitute/warp-tools/tree/develop/tools/TagSort)| The number of cells that detect the gene. |