Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ph pd 2514 multiome on terra #1223

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.2.2
2024-03-01 (Date of Last Commit)

* Updated the Optimus.wdl to run on Azure. This change does not affect the Multiome pipeline.

# 3.2.1
2024-02-29 (Date of Last Commit)

Expand Down
42 changes: 35 additions & 7 deletions pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ import "../../../pipelines/skylab/multiome/atac.wdl" as atac
import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender
import "../../../tasks/broad/Utilities.wdl" as utils

workflow Multiome {
String pipeline_version = "3.2.1"
String pipeline_version = "3.2.2"

input {
String cloud_provider
String input_id
String cloud_provider

# Optimus Inputs
String counting_mode = "sn_rna"
Expand All @@ -25,33 +28,56 @@ workflow Multiome {
Boolean ignore_r1_read_length = false
String star_strand_mode = "Forward"
Boolean count_exons = false
File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
String? soloMultiMappers

# ATAC inputs
# Array of input fastq files
Array[File] atac_r1_fastq
Array[File] atac_r2_fastq
Array[File] atac_r3_fastq

# BWA tar reference
File tar_bwa_reference
# Chromosone sizes
File chrom_sizes
# Trimadapters input
String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG"
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
# Whitelist
File atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt"

# CellBender
Boolean run_cellbender = false

}

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Define docker images
String snap_atac_docker_image = "snapatac2:1.0.5-2.3.2-1709230223"

nikellepetrillo marked this conversation as resolved.
Show resolved Hide resolved
# Define all whitelist files
File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
File gcp_atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt"
File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt"

# Determine which whitelist files to use based on cloud provider
File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist
File atac_whitelist = if cloud_provider == "gcp" then gcp_atac_whitelist else azure_atac_whitelist

# Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error
if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
input:
message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
}
}

# Call the Optimus workflow
call optimus.Optimus as Optimus {
input:
cloud_provider = cloud_provider,
counting_mode = counting_mode,
r1_fastq = gex_r1_fastq,
r2_fastq = gex_r2_fastq,
Expand All @@ -68,12 +94,14 @@ workflow Multiome {
ignore_r1_read_length = ignore_r1_read_length,
star_strand_mode = star_strand_mode,
count_exons = count_exons,
soloMultiMappers = soloMultiMappers
soloMultiMappers = soloMultiMappers,
cloud_provider = cloud_provider
}

# Call the ATAC workflow
call atac.ATAC as Atac {
input:
cloud_provider = cloud_provider,
read1_fastq_gzipped = atac_r1_fastq,
read2_fastq_gzipped = atac_r2_fastq,
read3_fastq_gzipped = atac_r3_fastq,
Expand All @@ -87,6 +115,7 @@ workflow Multiome {
}
call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes {
input:
docker_path = docker_prefix + snap_atac_docker_image,
atac_h5ad = Atac.snap_metrics,
gex_h5ad = Optimus.h5ad_output_file,
gex_whitelist = gex_whitelist,
Expand All @@ -108,7 +137,6 @@ workflow Multiome {
hardware_preemptible_tries = 2,
hardware_zones = "us-central1-a us-central1-c",
nvidia_driver_version = "470.82.01"

}
}

Expand Down
7 changes: 6 additions & 1 deletion pipelines/skylab/multiome/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# 1.1.8
# 1.1.9
2024-03-01 (Date of Last Commit)

* Updated the Optimus.wdl to run on Azure. This change does not affect the ATAC pipeline.

* # 1.1.8
2024-02-07 (Date of Last Commit)

* Updated the Metrics tasks to exclude mitochondrial genes from reads_mapped_uniquely, reads_mapped_multiple and reads_mapped_exonic, reads_mapped_exonic_as and reads_mapped_intergenic
Expand Down
1 change: 1 addition & 0 deletions pipelines/skylab/multiome/atac.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"ATAC.TrimAdapters.adapter_seq_read1": "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG",
"ATAC.TrimAdapters.adapter_seq_read2": "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG",
"ATAC.input_id": "scATAC",
"ATAC.cloud_provider":"gcp",
"ATAC.tar_bwa_reference": "gs://fc-dd55e131-ef49-4d02-aa2a-20640daaae1e/submissions/8f0dd71a-b42f-4503-b839-3f146941758a/IndexRef/53a91851-1f6c-4ab9-af66-b338ffb28b5a/call-BwaMem2Index/GRCh38.primary_assembly.genome.bwamem2.fa.tar",
"ATAC.preindex": "false"
}
59 changes: 44 additions & 15 deletions pipelines/skylab/multiome/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ version 1.0
import "../../../tasks/skylab/MergeSortBam.wdl" as Merge
import "../../../tasks/skylab/FastqProcessing.wdl" as FastqProcessing
import "../../../tasks/skylab/PairedTagUtils.wdl" as AddBB
import "../../../tasks/broad/Utilities.wdl" as utils

workflow ATAC {
meta {
Expand All @@ -18,6 +19,7 @@ workflow ATAC {

# Output prefix/base name for all intermediate files and pipeline outputs
String input_id
String cloud_provider

# Option for running files with preindex
Boolean preindex = false
Expand All @@ -41,7 +43,27 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "1.1.8"
String pipeline_version = "1.1.9"

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Docker image names
String warp_tools_2_0_0 = "warp-tools:2.0.0"
String cutadapt_docker = "cutadapt:1.0.0-4.4-1709146458"
String samtools_docker = "samtools-dist-bwa:3.0.0"
String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
String snap_atac_docker = "snapatac2:1.0.4-2.3.1"

# Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error
if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
input:
message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
}
}

parameter_meta {
read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads"
Expand All @@ -52,7 +74,6 @@ workflow ATAC {
num_threads_bwa: "Number of threads for bwa-mem2 task (default: 128)"
mem_size_bwa: "Memory size in GB for bwa-mem2 task (default: 512)"
cpu_platform_bwa: "CPU platform for bwa-mem2 task (default: Intel Ice Lake)"

}

call GetNumSplits {
Expand All @@ -69,7 +90,8 @@ workflow ATAC {
barcodes_fastq = read2_fastq_gzipped,
output_base_name = input_id,
num_output_files = GetNumSplits.ranks_per_node_out,
whitelist = whitelist
whitelist = whitelist,
docker_path = docker_prefix + warp_tools_2_0_0
}

scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
Expand All @@ -79,7 +101,8 @@ workflow ATAC {
read3_fastq = SplitFastq.fastq_R3_output_array[idx],
output_base_name = input_id + "_" + idx,
adapter_seq_read1 = adapter_seq_read1,
adapter_seq_read3 = adapter_seq_read3
adapter_seq_read3 = adapter_seq_read3,
docker_path = docker_prefix + cutadapt_docker
}
}

Expand All @@ -91,21 +114,24 @@ workflow ATAC {
output_base_name = input_id,
nthreads = num_threads_bwa,
mem_size = mem_size_bwa,
cpu_platform = cpu_platform_bwa
cpu_platform = cpu_platform_bwa,
docker_path = docker_prefix + samtools_docker
}

if (preindex) {
call AddBB.AddBBTag as BBTag {
input:
bam = BWAPairedEndAlignment.bam_aligned_output,
input_id = input_id
input_id = input_id,
docker_path = docker_prefix + upstools_docker
}
call CreateFragmentFile as BB_fragment {
input:
bam = BBTag.bb_bam,
chrom_sizes = chrom_sizes,
annotations_gtf = annotations_gtf,
preindex = preindex
preindex = preindex,
docker_path = docker_prefix + snap_atac_docker
}
}
if (!preindex) {
Expand All @@ -114,7 +140,8 @@ workflow ATAC {
bam = BWAPairedEndAlignment.bam_aligned_output,
chrom_sizes = chrom_sizes,
annotations_gtf = annotations_gtf,
preindex = preindex
preindex = preindex,
docker_path = docker_prefix + snap_atac_docker

}
}
Expand Down Expand Up @@ -231,7 +258,7 @@ task TrimAdapters {
# Runtime attributes/docker
Int disk_size = ceil(2 * ( size(read1_fastq, "GiB") + size(read3_fastq, "GiB") )) + 200
Int mem_size = 4
String docker_image = "us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919"
String docker_path
}

parameter_meta {
Expand All @@ -242,7 +269,7 @@ task TrimAdapters {
adapter_seq_read1: "cutadapt option for the sequence adapter for read 1 fastq"
adapter_seq_read3: "cutadapt option for the sequence adapter for read 3 fastq"
output_base_name: "base name to be used for the output of the task"
docker_image: "the docker image using cutadapt to be used (default:us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919)"
docker_path: "The docker image path containing the runtime environment for this task"
mem_size: "the size of memory used during trimming adapters"
disk_size : "disk size used in trimming adapters step"
}
Expand All @@ -269,7 +296,7 @@ task TrimAdapters {

# use docker image for given tool cutadapat
runtime {
docker: docker_image
docker: docker_path
disks: "local-disk ${disk_size} HDD"
memory: "${mem_size} GiB"
}
Expand All @@ -290,7 +317,7 @@ task BWAPairedEndAlignment {
String read_group_sample_name = "RGSN1"
String suffix = "trimmed_adapters.fastq.gz"
String output_base_name
String docker_image = "us.gcr.io/broad-gotc-prod/samtools-dist-bwa:2.0.0"
String docker_path

# Runtime attributes
Int disk_size = 2000
Expand All @@ -309,7 +336,7 @@ task BWAPairedEndAlignment {
mem_size: "the size of memory used during alignment"
disk_size : "disk size used in bwa alignment step"
output_base_name: "basename to be used for the output of the task"
docker_image: "the docker image using BWA to be used (default: us.gcr.io/broad-gotc-prod/samtools-bwa-mem-2:1.0.0-2.2.1_x64-linux-1685469504)"
docker_path: "The docker image path containing the runtime environment for this task"
}

String bam_aligned_output_name = output_base_name + ".bam"
Expand Down Expand Up @@ -418,7 +445,7 @@ task BWAPairedEndAlignment {
>>>

runtime {
docker: docker_image
docker: docker_path
disks: "local-disk ${disk_size} SSD"
cpu: nthreads
cpuPlatform: cpu_platform
Expand All @@ -442,6 +469,7 @@ task CreateFragmentFile {
Int mem_size = 16
Int nthreads = 1
String cpuPlatform = "Intel Cascade Lake"
String docker_path
}

String bam_base_name = basename(bam, ".bam")
Expand All @@ -452,6 +480,7 @@ task CreateFragmentFile {
chrom_sizes: "Text file containing chrom_sizes for genome build (i.e. hg38)."
disk_size: "Disk size used in create fragment file step."
mem_size: "The size of memory used in create fragment file."
docker_path: "The docker image path containing the runtime environment for this task"
}

command <<<
Expand Down Expand Up @@ -492,7 +521,7 @@ task CreateFragmentFile {
>>>

runtime {
docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1"
docker: docker_path
disks: "local-disk ${disk_size} SSD"
memory: "${mem_size} GiB"
cpu: nthreads
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf",
"Multiome.input_id":"10k_PBMC_downsampled",
"Multiome.cloud_provider":"gcp",
"Multiome.gex_r1_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_gex.fastq.gz"
],
Expand All @@ -23,5 +24,6 @@
"Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
"Multiome.Atac.num_threads_bwa":"16",
"Multiome.Atac.mem_size_bwa":"64",
"Multiome.soloMultiMappers":"Uniform"
"Multiome.soloMultiMappers":"Uniform",
"Multiome.cloud_provider":"gcp"
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_I1_001.fastq.gz"
],
"Multiome.input_id":"10k_PBMC",
"Multiome.cloud_provider":"gcp",
"Multiome.gex_r1_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R1_001.fastq.gz",
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R1_001.fastq.gz"
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 6.4.2
2024-03-01 (Date of Last Commit)
* Updated the Optimus.wdl to run on Azure.


# 6.4.1
2024-02-29 (Date of Last Commit)
* Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Optimus workflow
Expand Down
Loading