Skip to content

Commit

Permalink
added nhash_id to multiome, optimus, paired-tag, and atac (#1316)
Browse files Browse the repository at this point in the history
* added nhash_id to multiome, optimus, paired-tag, and atac

* changed to null input

* Fix testing wdls for nhash_id

* fixing nhash id errors

* fixed nhash_id examples and verification wdls

* made gex_nash_id optional in STARAlign mergestaroutputs task

* fixing h5ad utils

* fixing nash id python variable in atac wdl

* fixed h5ad variable name in atac fragment file creation

* fixed echo in STARAlign for nhash id

* passing nhash id through pipeline

* updated changelogs

* updated documentation for nhash_id
  • Loading branch information
ekiernan authored Jul 11, 2024
1 parent e49ffbc commit e52f2c1
Show file tree
Hide file tree
Showing 35 changed files with 138 additions and 30 deletions.
4 changes: 3 additions & 1 deletion pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# 5.1.2
# 5.2.0
2024-07-09 (Date of Last Commit)

* Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the ATAC fragment h5ad, the gene expression h5ad (in the data.uns), and the gene expression library metrics CSV output; default is set to null
* Added test statements again for GH action (to release from develop). Will probably revert


# 5.1.0
2024-06-28 (Date of Last Commit)

Expand Down
7 changes: 6 additions & 1 deletion pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender

workflow Multiome {
String pipeline_version = "5.1.2"

String pipeline_version = "5.2.0"

input {
String input_id
# Additional library aliquot ID
String? nhash_id

# Optimus Inputs
String counting_mode = "sn_rna"
Expand Down Expand Up @@ -57,6 +60,7 @@ workflow Multiome {
i1_fastq = gex_i1_fastq,
input_id = input_id + "_gex",
output_bam_basename = input_id + "_gex",
gex_nhash_id = nhash_id,
tar_star_reference = tar_star_reference,
annotations_gtf = annotations_gtf,
mt_genes = mt_genes,
Expand All @@ -82,6 +86,7 @@ workflow Multiome {
whitelist = atac_whitelist,
adapter_seq_read1 = adapter_seq_read1,
annotations_gtf = annotations_gtf,
atac_nhash_id = nhash_id,
adapter_seq_read3 = adapter_seq_read3
}
call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes {
Expand Down
3 changes: 2 additions & 1 deletion pipelines/skylab/multiome/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# 2.0.2
# 2.1.0
2024-07-09 (Date of Last Commit)

* Added new optional input parameter of atac_nhash_id, an identifier for a library aliquot that is echoed in the atac fragment metrics h5ad (in the data.uns); default is set to null
* Added test statements again for GH action (to release from develop). Will probably revert

# 2.0.0
Expand Down
14 changes: 11 additions & 3 deletions pipelines/skylab/multiome/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ workflow ATAC {

# Output prefix/base name for all intermediate files and pipeline outputs
String input_id
# Additional library aliquot ID
String? atac_nhash_id

# Option for running files with preindex
Boolean preindex = false
Expand All @@ -41,7 +43,7 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "2.0.2"
String pipeline_version = "2.1.0"

parameter_meta {
read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads"
Expand Down Expand Up @@ -105,7 +107,8 @@ workflow ATAC {
bam = BBTag.bb_bam,
chrom_sizes = chrom_sizes,
annotations_gtf = annotations_gtf,
preindex = preindex
preindex = preindex,
atac_nhash_id = atac_nhash_id
}
}
if (!preindex) {
Expand All @@ -114,7 +117,8 @@ workflow ATAC {
bam = BWAPairedEndAlignment.bam_aligned_output,
chrom_sizes = chrom_sizes,
annotations_gtf = annotations_gtf,
preindex = preindex
preindex = preindex,
atac_nhash_id = atac_nhash_id

}
}
Expand Down Expand Up @@ -447,6 +451,7 @@ task CreateFragmentFile {
Int mem_size = 16
Int nthreads = 4
String cpuPlatform = "Intel Cascade Lake"
String atac_nhash_id = ""
}

String bam_base_name = basename(bam, ".bam")
Expand All @@ -470,6 +475,7 @@ task CreateFragmentFile {
chrom_sizes = "~{chrom_sizes}"
atac_gtf = "~{annotations_gtf}"
preindex = "~{preindex}"
atac_nhash_id = "~{atac_nhash_id}"
# calculate chrom size dictionary based on text file
chrom_size_dict={}
Expand All @@ -494,6 +500,8 @@ task CreateFragmentFile {
# those settings allow us to retain all barcodes
pp.import_data("~{bam_base_name}.fragments.tsv", file="temp_metrics.h5ad", chrom_sizes=chrom_size_dict, min_num_fragments=0)
atac_data = ad.read_h5ad("temp_metrics.h5ad")
# Add nhash_id to h5ad file as unstructured metadata
atac_data.uns['NHashID'] = atac_nhash_id
# calculate tsse metrics
snap.metrics.tsse(atac_data, atac_gtf)
# Write new atac file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
"Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
"Multiome.Atac.num_threads_bwa":"16",
"Multiome.Atac.mem_size_bwa":"64",
"Multiome.soloMultiMappers":"Uniform"
"Multiome.soloMultiMappers":"Uniform",
"Multiome.nhash_id":"example_1234"
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@
"Multiome.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
"Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
"Multiome.Atac.num_threads_bwa":"24",
"Multiome.Atac.mem_size_bwa":"175"
"Multiome.Atac.mem_size_bwa":"175",
"Multiome.nhash_id":"example_1234"
}
5 changes: 5 additions & 0 deletions pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 7.3.0
2024-07-09 (Date of Last Commit)

* Added new optional input parameter of gex_nhash_id, a string identifier for a library aliquot that is echoed in the h5ad cell by gene matrix (in the data.uns) and the library metrics CSV output; default is set to null

# 7.2.0
2024-06-28 (Date of Last Commit)

Expand Down
12 changes: 9 additions & 3 deletions pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ workflow Optimus {
Array[File] r2_fastq
Array[File]? i1_fastq
String input_id
# String for additional library aliquot ID
String? gex_nhash_id = ""
String output_bam_basename = input_id
String? input_name
String? input_id_metadata_field
Expand Down Expand Up @@ -66,7 +68,7 @@ workflow Optimus {
# version of this pipeline

String pipeline_version = "7.2.0"
String pipeline_version = "7.3.0"


# this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
Expand Down Expand Up @@ -170,7 +172,8 @@ workflow Optimus {
umipercell = STARsoloFastq.umipercell,
input_id = input_id,
counting_mode = counting_mode,
expected_cells = expected_cells
expected_cells = expected_cells,
gex_nhash_id = gex_nhash_id
}
if (counting_mode == "sc_rna"){
call RunEmptyDrops.RunEmptyDrops {
Expand All @@ -186,6 +189,7 @@ workflow Optimus {
call H5adUtils.OptimusH5adGeneration{
input:
input_id = input_id,
gex_nhash_id = gex_nhash_id,
input_name = input_name,
input_id_metadata_field = input_id_metadata_field,
input_name_metadata_field = input_name_metadata_field,
Expand All @@ -212,11 +216,13 @@ workflow Optimus {
summary = STARsoloFastq.summary_sn_rna,
align_features = STARsoloFastq.align_features_sn_rna,
umipercell = STARsoloFastq.umipercell_sn_rna,
input_id = input_id
input_id = input_id,
gex_nhash_id = gex_nhash_id
}
call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{
input:
input_id = input_id,
gex_nhash_id = gex_nhash_id,
input_name = input_name,
input_id_metadata_field = input_id_metadata_field,
input_name_metadata_field = input_name_metadata_field,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@
"Optimus.input_id": "pbmc_human_v3",
"Optimus.tenx_chemistry_version": "3",
"Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf",
"Optimus.star_strand_mode": "Forward"
"Optimus.star_strand_mode": "Forward",
"Optimus.gex_nhash_id":"example_1234"
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@
"Optimus.input_id": "neurons2k_mouse",
"Optimus.tenx_chemistry_version": "2",
"Optimus.star_strand_mode": "Unstranded",
"Optimus.gex_nhash_id":"example_1234",
"Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf"
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@
"Optimus.star_strand_mode": "Unstranded",
"Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf",
"Optimus.counting_mode": "sn_rna",
"Optimus.gex_nhash_id":"example_1234",
"Optimus.count_exons": true
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"Optimus.input_id": "8k_pbmc",
"Optimus.tenx_chemistry_version": "2",
"Optimus.star_strand_mode": "Unstranded",
"Optimus.gex_nhash_id":"example_1234",
"Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf"
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"Optimus.input_id": "8k_pbmc",
"Optimus.tenx_chemistry_version": "2",
"Optimus.star_strand_mode": "Forward",
"Optimus.gex_nhash_id":"example_1234",
"Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/gencode.v27.primary_assembly.annotation.gtf"
}

Expand Down
4 changes: 3 additions & 1 deletion pipelines/skylab/paired_tag/PairedTag.changelog.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# 1.1.2
# 1.2.0
2024-07-09 (Date of Last Commit)

* Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the workflow fragment h5ad, the Optimus workflow gene expression h5ad (in the data.uns), and the Optimus gene expression library metrics CSV output; default is set to null
* Added test statements again for GH action (to release from develop). Will probably revert


# 1.1.0
2024-06-28 (Date of Last Commit)

Expand Down
10 changes: 7 additions & 3 deletions pipelines/skylab/paired_tag/PairedTag.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing
workflow PairedTag {
String pipeline_version = "1.1.2"
String pipeline_version = "1.2.0"

input {
String input_id
# Additional library aliquot id
String? nhash_id

# Optimus Inputs
String counting_mode = "sn_rna"
Expand Down Expand Up @@ -63,7 +65,8 @@ workflow PairedTag {
ignore_r1_read_length = ignore_r1_read_length,
star_strand_mode = star_strand_mode,
count_exons = count_exons,
soloMultiMappers = soloMultiMappers
soloMultiMappers = soloMultiMappers,
gex_nhash_id = nhash_id
}

# Call the ATAC workflow
Expand Down Expand Up @@ -91,7 +94,8 @@ workflow PairedTag {
adapter_seq_read1 = adapter_seq_read1,
adapter_seq_read3 = adapter_seq_read3,
annotations_gtf = annotations_gtf,
preindex = preindex
preindex = preindex,
atac_nhash_id = nhash_id
}

if (preindex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
"PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
"PairedTag.Atac_preindex.num_threads_bwa":"16",
"PairedTag.Atac_preindex.mem_size_bwa":"64",
"PairedTag.soloMultiMappers":"Uniform"
"PairedTag.soloMultiMappers":"Uniform",
"PairedTag.nhash_id":"example_1234"
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
"PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
"PairedTag.Atac_preindex.num_threads_bwa":"16",
"PairedTag.Atac_preindex.mem_size_bwa":"64",
"PairedTag.soloMultiMappers":"Uniform"
"PairedTag.soloMultiMappers":"Uniform",
"PairedTag.nhash_id":"example_1234"
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@
"PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
"PairedTag.Atac_preindex.num_threads_bwa":"16",
"PairedTag.Atac_preindex.mem_size_bwa":"64",
"PairedTag.soloMultiMappers":"Uniform"
"PairedTag.soloMultiMappers":"Uniform",
"PairedTag.nhash_id":"example_1234"
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,6 @@
"PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
"PairedTag.Atac_preindex.num_threads_bwa":"24",
"PairedTag.Atac_preindex.mem_size_bwa":"175",
"PairedTag.soloMultiMappers":"Uniform"
"PairedTag.soloMultiMappers":"Uniform",
"PairedTag.nhash_id":"example_1234"
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@
"PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake",
"PairedTag.Atac_preindex.num_threads_bwa":"16",
"PairedTag.Atac_preindex.mem_size_bwa":"64",
"PairedTag.soloMultiMappers":"Uniform"
"PairedTag.soloMultiMappers":"Uniform",
"PairedTag.nhash_id":"example_1234"
}
5 changes: 5 additions & 0 deletions pipelines/skylab/slideseq/SlideSeq.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.1.8
2024-07-09 (Date of Last Commit)

* Added new optional input parameter of gex_nhash_id to the STARAlign task; this does not impact the SlideSeq workflow

# 3.1.7
2024-06-28 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/slideseq/SlideSeq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge

workflow SlideSeq {

String pipeline_version = "3.1.7"
String pipeline_version = "3.1.8"

input {
Array[File] r1_fastq
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 1.3.6
2024-07-09 (Date of Last Commit)
* Added new optional input parameter of gex_nhash_id to the STARAlign task; this does not impact the MultiSampleSmartSeq2SingleNucleus workflow

# 1.3.5
2024-06-28 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
String? input_id_metadata_field
}
# Version of this pipeline
String pipeline_version = "1.3.5"
String pipeline_version = "1.3.6"

if (false) {
String? none = "None"
Expand Down
Loading

0 comments on commit e52f2c1

Please sign in to comment.