From e7cd966d81aa05c54e2b22932bf88f2acd9f6ea3 Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 22 Feb 2024 10:04:41 -0500 Subject: [PATCH 01/11] ph logic to pass in docker images based on cloud provider --- pipelines/skylab/multiome/Multiome.wdl | 17 +++++++++-- pipelines/skylab/multiome/atac.wdl | 42 +++++++++++++++++++------- tasks/skylab/FastqProcessing.wdl | 5 ++- tasks/skylab/H5adUtils.wdl | 15 ++++----- tasks/skylab/PairedTagUtils.wdl | 4 +-- 5 files changed, 58 insertions(+), 25 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 16113b5e8c..c2569956e5 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -9,6 +9,7 @@ workflow Multiome { String pipeline_version = "3.1.2" input { + String cloud_provider String input_id # Optimus Inputs @@ -34,7 +35,6 @@ workflow Multiome { Array[File] atac_r1_fastq Array[File] atac_r2_fastq Array[File] atac_r3_fastq - # BWA tar reference File tar_bwa_reference # Chromosone sizes @@ -50,9 +50,19 @@ workflow Multiome { } + # Determine docker prefix based on cloud provider + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # Define docker images + snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229" + + # Call the Optimus workflow call optimus.Optimus as Optimus { input: + cloud_provider = cloud_provider, counting_mode = counting_mode, r1_fastq = gex_r1_fastq, r2_fastq = gex_r2_fastq, @@ -76,6 +86,7 @@ workflow Multiome { # Call the ATAC workflow call atac.ATAC as Atac { input: + cloud_provider = cloud_provider, read1_fastq_gzipped = atac_r1_fastq, read2_fastq_gzipped = atac_r2_fastq, read3_fastq_gzipped = atac_r3_fastq, @@ -85,10 +96,12 @@ workflow Multiome { chrom_sizes = chrom_sizes, whitelist = atac_whitelist, adapter_seq_read1 = adapter_seq_read1, - adapter_seq_read3 = adapter_seq_read3 + adapter_seq_read3 = adapter_seq_read3, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes { input: + docker_path = docker_prefix + snap_atac_docker_image, atac_h5ad = Atac.snap_metrics, gex_h5ad = Optimus.h5ad_output_file, gex_whitelist = gex_whitelist, diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index 4db04a9968..7e4105aaa3 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -18,6 +18,7 @@ workflow ATAC { # Output prefix/base name for all intermediate files and pipeline outputs String input_id + String cloud_provider # Option for running files with preindex Boolean preindex = false @@ -43,6 +44,18 @@ workflow ATAC { String pipeline_version = "1.1.7" + # Determine docker prefix based on cloud provider + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # Docker image names + String warp_tools_2_0_0 = "warp-tools:2.0.0" + String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" + String sam_tools_docker = "samtools-dist-bwa:2.0.0" + String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" + String snap_atac_docker = "snapatac2:1.0.4-2.3.1" + parameter_meta { read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads" read2_fastq_gzipped: "read 2 FASTQ file as input for the pipeline, contains the cellular barcodes corresponding to the reads in the read1 FASTQ and read 3 FASTQ" @@ -69,7 +82,8 @@ workflow ATAC { barcodes_fastq = read2_fastq_gzipped, output_base_name = input_id, num_output_files = GetNumSplits.ranks_per_node_out, - whitelist = whitelist + whitelist = whitelist, + docker_path = docker_prefix + warp_tools_2_0_0 } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -79,7 +93,8 @@ workflow ATAC { read3_fastq = SplitFastq.fastq_R3_output_array[idx], output_base_name = input_id + "_" + idx, adapter_seq_read1 = adapter_seq_read1, - adapter_seq_read3 = adapter_seq_read3 + adapter_seq_read3 = adapter_seq_read3, + docker_path = docker_prefix + cutadapt_docker } } @@ -91,21 +106,24 @@ workflow ATAC { output_base_name = input_id, nthreads = num_threads_bwa, mem_size = mem_size_bwa, - cpu_platform = cpu_platform_bwa + cpu_platform = cpu_platform_bwa, + docker_path = docker_prefix + sam_tools_docker } if (preindex) { call AddBB.AddBBTag as BBTag { input: bam = BWAPairedEndAlignment.bam_aligned_output, - input_id = input_id + input_id = input_id, + docker_path = docker_prefix + upstools_docker } call CreateFragmentFile as BB_fragment { input: bam = BBTag.bb_bam, chrom_sizes = chrom_sizes, annotations_gtf = annotations_gtf, - preindex = preindex + preindex = preindex, + docker_path = docker_prefix + snap_atac_docker } } if (!preindex) { @@ -114,7 +132,8 @@ workflow ATAC { bam = BWAPairedEndAlignment.bam_aligned_output, chrom_sizes = chrom_sizes, annotations_gtf = annotations_gtf, - preindex = preindex + preindex = preindex, + docker_path = docker_prefix + snap_atac_docker } } @@ -231,7 +250,7 @@ task TrimAdapters { # Runtime attributes/docker Int disk_size = ceil(2 * ( size(read1_fastq, "GiB") + size(read3_fastq, "GiB") )) + 200 Int mem_size = 4 - String docker_image = "us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919" + String docker_path } parameter_meta { @@ -269,7 +288,7 @@ task TrimAdapters { # use docker image for given tool cutadapat runtime { - docker: docker_image + docker: docker_path disks: "local-disk ${disk_size} HDD" memory: "${mem_size} GiB" } @@ -290,7 +309,7 @@ task BWAPairedEndAlignment { String read_group_sample_name = "RGSN1" String suffix = "trimmed_adapters.fastq.gz" String output_base_name - String docker_image = "us.gcr.io/broad-gotc-prod/samtools-dist-bwa:2.0.0" + String docker_path # Runtime attributes Int disk_size = 2000 @@ -418,7 +437,7 @@ task BWAPairedEndAlignment { >>> runtime { - docker: docker_image + docker: docker_path disks: "local-disk ${disk_size} SSD" cpu: nthreads cpuPlatform: cpu_platform @@ -442,6 +461,7 @@ task CreateFragmentFile { Int mem_size = 16 Int nthreads = 1 String cpuPlatform = "Intel Cascade Lake" + String docker_path } String bam_base_name = basename(bam, ".bam") @@ -492,7 +512,7 @@ task CreateFragmentFile { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1" + docker: docker_path disks: "local-disk ${disk_size} SSD" memory: "${mem_size} GiB" cpu: nthreads diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index ac22cc38aa..533a8b07d3 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -245,8 +245,7 @@ task FastqProcessATAC { String barcode_index1 = basename(barcodes_fastq[0]) # [?] copied from corresponding optimus wdl for fastqprocessing - # using the latest build of warp-tools in GCR - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.0" + String docker_path # Runtime attributes [?] Int mem_size = 5 @@ -361,7 +360,7 @@ task FastqProcessATAC { >>> runtime { - docker: docker + docker: docker_path cpu: cpu memory: "${mem_size} MiB" disks: "local-disk ${disk_size} HDD" diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 4279f6ff6c..1720438994 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -186,22 +186,23 @@ task SingleNucleusOptimusH5adOutput { } task JoinMultiomeBarcodes { - input { + input { File atac_h5ad File atac_fragment File gex_h5ad File gex_whitelist File atac_whitelist + String docker_path Int nthreads = 1 String cpuPlatform = "Intel Cascade Lake" } - String gex_base_name = basename(gex_h5ad, ".h5ad") - String atac_base_name = basename(atac_h5ad, ".h5ad") - String atac_fragment_base = basename(atac_fragment, ".tsv") + String gex_base_name = basename(gex_h5ad, ".h5ad") + String atac_base_name = basename(atac_h5ad, ".h5ad") + String atac_fragment_base = basename(atac_fragment, ".tsv") - Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000 - Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10 + Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000 + Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10 parameter_meta { atac_h5ad: "The resulting h5ad from the ATAC workflow." @@ -280,7 +281,7 @@ task JoinMultiomeBarcodes { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229" + docker: docker_path disks: "local-disk ~{disk} HDD" memory: "${machine_mem_mb} MiB" cpu: nthreads diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl index 779ac4fe57..802abdc4a2 100644 --- a/tasks/skylab/PairedTagUtils.wdl +++ b/tasks/skylab/PairedTagUtils.wdl @@ -132,7 +132,7 @@ task AddBBTag { String input_id # using the latest build of upstools docker in GCR - String docker = "us.gcr.io/broad-gotc-prod/upstools:1.0.0-2023.03.03-1704300311" + String docker_path # Runtime attributes Int mem_size = 8 @@ -169,7 +169,7 @@ task AddBBTag { >>> runtime { - docker: docker + docker: docker_path cpu: cpu memory: "${mem_size} GiB" disks: "local-disk ${disk_size} HDD" From b2921ee9138f7c1d3fd87ac1f8c175aba6d665e1 Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 22 Feb 2024 10:57:43 -0500 Subject: [PATCH 02/11] determine which whitelist files to use --- pipelines/skylab/multiome/Multiome.wdl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index c2569956e5..5953175ba3 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -27,7 +27,6 @@ workflow Multiome { Boolean ignore_r1_read_length = false String star_strand_mode = "Forward" Boolean count_exons = false - File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" String? soloMultiMappers # ATAC inputs @@ -42,8 +41,6 @@ workflow Multiome { # Trimadapters input String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" - # Whitelist - File atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" # CellBender Boolean run_cellbender = false @@ -58,6 +55,15 @@ workflow Multiome { # Define docker images snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229" + # Define all whitelist files + File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" + File gcp_atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" + File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt" + File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt" + + Determine which whitelist files to use based on cloud provider + File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist + File atac_whitelist = if cloud_provider == "gcp" then gcp_atac_whitelist else azure_atac_whitelist # Call the Optimus workflow call optimus.Optimus as Optimus { From 6edd539d2726da73d963379cbd7f7d7fc005661d Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 22 Feb 2024 11:01:15 -0500 Subject: [PATCH 03/11] update tests --- pipelines/skylab/multiome/atac.json | 1 + .../multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json | 1 + pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json | 1 + 3 files changed, 3 insertions(+) diff --git a/pipelines/skylab/multiome/atac.json b/pipelines/skylab/multiome/atac.json index a8b9465fdc..1e898edd48 100644 --- a/pipelines/skylab/multiome/atac.json +++ b/pipelines/skylab/multiome/atac.json @@ -4,6 +4,7 @@ "ATAC.TrimAdapters.adapter_seq_read1": "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG", "ATAC.TrimAdapters.adapter_seq_read2": "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG", "ATAC.input_id": "scATAC", + "ATAC.cloud_provider":"gcp", "ATAC.tar_bwa_reference": "gs://fc-dd55e131-ef49-4d02-aa2a-20640daaae1e/submissions/8f0dd71a-b42f-4503-b839-3f146941758a/IndexRef/53a91851-1f6c-4ab9-af66-b338ffb28b5a/call-BwaMem2Index/GRCh38.primary_assembly.genome.bwamem2.fa.tar", "ATAC.preindex": "false" } diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json index 902b564388..4bd25eeba3 100644 --- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -1,6 +1,7 @@ { "Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", "Multiome.input_id":"10k_PBMC_downsampled", + "Multiome.cloud_provider":"gcp", "Multiome.gex_r1_fastq":[ "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_gex.fastq.gz" ], diff --git a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json index 846b91ed2d..3e7ffbe622 100644 --- a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json +++ b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json @@ -5,6 +5,7 @@ "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_I1_001.fastq.gz" ], "Multiome.input_id":"10k_PBMC", + "Multiome.cloud_provider":"gcp", "Multiome.gex_r1_fastq":[ "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R1_001.fastq.gz", "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R1_001.fastq.gz" From 9892d109fa2e8a86b5d9da868c9747903c0a457f Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:22:07 -0500 Subject: [PATCH 04/11] add parameter metadata --- pipelines/skylab/multiome/Multiome.wdl | 12 +++++++++--- pipelines/skylab/multiome/atac.wdl | 11 ++++++----- tasks/skylab/FastqProcessing.wdl | 2 +- tasks/skylab/PairedTagUtils.wdl | 4 +--- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 5953175ba3..eb3e8ff76d 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -65,6 +65,14 @@ workflow Multiome { File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist File atac_whitelist = if cloud_provider == "gcp" then gcp_atac_whitelist else azure_atac_whitelist + # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + # Call the Optimus workflow call optimus.Optimus as Optimus { input: @@ -102,8 +110,7 @@ workflow Multiome { chrom_sizes = chrom_sizes, whitelist = atac_whitelist, adapter_seq_read1 = adapter_seq_read1, - adapter_seq_read3 = adapter_seq_read3, - ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker + adapter_seq_read3 = adapter_seq_read3 } call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes { input: @@ -129,7 +136,6 @@ workflow Multiome { hardware_preemptible_tries = 2, hardware_zones = "us-central1-a us-central1-c", nvidia_driver_version = "470.82.01" - } } diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index 7e4105aaa3..085d8841a4 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -42,7 +42,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "1.1.7" + String pipeline_version = "1.1.8" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" @@ -51,8 +51,8 @@ workflow ATAC { # Docker image names String warp_tools_2_0_0 = "warp-tools:2.0.0" - String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" - String sam_tools_docker = "samtools-dist-bwa:2.0.0" + String cutadapt_docker = "cutadapt:1.0.0-4.4-1709146458" + String sam_tools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" String snap_atac_docker = "snapatac2:1.0.4-2.3.1" @@ -261,7 +261,7 @@ task TrimAdapters { adapter_seq_read1: "cutadapt option for the sequence adapter for read 1 fastq" adapter_seq_read3: "cutadapt option for the sequence adapter for read 3 fastq" output_base_name: "base name to be used for the output of the task" - docker_image: "the docker image using cutadapt to be used (default:us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919)" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "the size of memory used during trimming adapters" disk_size : "disk size used in trimming adapters step" } @@ -328,7 +328,7 @@ task BWAPairedEndAlignment { mem_size: "the size of memory used during alignment" disk_size : "disk size used in bwa alignment step" output_base_name: "basename to be used for the output of the task" - docker_image: "the docker image using BWA to be used (default: us.gcr.io/broad-gotc-prod/samtools-bwa-mem-2:1.0.0-2.2.1_x64-linux-1685469504)" + docker_path: "The docker image path containing the runtime environment for this task" } String bam_aligned_output_name = output_base_name + ".bam" @@ -472,6 +472,7 @@ task CreateFragmentFile { chrom_sizes: "Text file containing chrom_sizes for genome build (i.e. hg38)." disk_size: "Disk size used in create fragment file step." mem_size: "The size of memory used in create fragment file." + docker_path: "The docker image path containing the runtime environment for this task" } command <<< diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index 533a8b07d3..6e588ca2b9 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -271,7 +271,7 @@ task FastqProcessATAC { read_structure: "A string that specifies the barcode (C) positions in the Read 2 fastq" barcode_orientation: "A string that specifies the orientation of barcode needed for scATAC data. The default is FIRST_BP. Other options include LAST_BP, FIRST_BP_RC or LAST_BP_RC." whitelist: "10x genomics cell barcode whitelist for scATAC" - docker: "(optional) the docker image containing the runtime environment for this task" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk_size: "(optional) the amount of disk space (GiB) to provision for this task" diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl index 802abdc4a2..3abc7df45a 100644 --- a/tasks/skylab/PairedTagUtils.wdl +++ b/tasks/skylab/PairedTagUtils.wdl @@ -130,8 +130,6 @@ task AddBBTag { input { File bam String input_id - - # using the latest build of upstools docker in GCR String docker_path # Runtime attributes @@ -150,7 +148,7 @@ task AddBBTag { parameter_meta { bam: "BAM with aligned reads and barcode in the CB tag" input_id: "input ID" - docker: "(optional) the docker image containing the runtime environment for this task" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk_size: "(optional) the amount of disk space (GiB) to provision for this task" From d745f79838631da8bff8ecf5e692df42e68228b3 Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:27:37 -0500 Subject: [PATCH 05/11] add error handling in atac --- pipelines/skylab/multiome/atac.wdl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index 085d8841a4..f4b98d21c3 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -56,6 +56,14 @@ workflow ATAC { String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" String snap_atac_docker = "snapatac2:1.0.4-2.3.1" + # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + parameter_meta { read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads" read2_fastq_gzipped: "read 2 FASTQ file as input for the pipeline, contains the cellular barcodes corresponding to the reads in the read1 FASTQ and read 3 FASTQ" @@ -65,7 +73,6 @@ workflow ATAC { num_threads_bwa: "Number of threads for bwa-mem2 task (default: 128)" mem_size_bwa: "Memory size in GB for bwa-mem2 task (default: 512)" cpu_platform_bwa: "CPU platform for bwa-mem2 task (default: Intel Ice Lake)" - } call GetNumSplits { From 2cb955580de073c8e5b9f560c7ccfe607137b55b Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 29 Feb 2024 13:14:25 -0500 Subject: [PATCH 06/11] fix comment --- pipelines/skylab/multiome/Multiome.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index eb3e8ff76d..71c4ecb0c9 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -61,7 +61,7 @@ workflow Multiome { File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt" File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt" - Determine which whitelist files to use based on cloud provider + # Determine which whitelist files to use based on cloud provider File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist File atac_whitelist = if cloud_provider == "gcp" then gcp_atac_whitelist else azure_atac_whitelist From 6c0bcd02e77340e208138c3828ef5c68c42301b7 Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 29 Feb 2024 13:18:37 -0500 Subject: [PATCH 07/11] PR comments --- pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/multiome/atac.wdl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 0e71497b74..6da9c52508 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -52,7 +52,7 @@ workflow Multiome { String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # Define docker images - snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229" + String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229" # Define all whitelist files File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index f4b98d21c3..2ef9203a21 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -52,7 +52,7 @@ workflow ATAC { # Docker image names String warp_tools_2_0_0 = "warp-tools:2.0.0" String cutadapt_docker = "cutadapt:1.0.0-4.4-1709146458" - String sam_tools_docker = "samtools-dist-bwa:3.0.0" + String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" String snap_atac_docker = "snapatac2:1.0.4-2.3.1" @@ -114,7 +114,7 @@ workflow ATAC { nthreads = num_threads_bwa, mem_size = mem_size_bwa, cpu_platform = cpu_platform_bwa, - docker_path = docker_prefix + sam_tools_docker + docker_path = docker_prefix + samtools_docker } if (preindex) { From a23afae0feab84d216cd13b5c8876982200433db Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 29 Feb 2024 13:45:30 -0500 Subject: [PATCH 08/11] update image and add utils --- pipelines/skylab/multiome/Multiome.wdl | 3 ++- pipelines/skylab/multiome/atac.wdl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 6da9c52508..071f350803 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -4,6 +4,7 @@ import "../../../pipelines/skylab/multiome/atac.wdl" as atac import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender +import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { String pipeline_version = "3.2.0" @@ -52,7 +53,7 @@ workflow Multiome { String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # Define docker images - String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229" + String snap_atac_docker_image = "snapatac2:1.0.5-2.3.2-1709230223" # Define all whitelist files File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index 2ef9203a21..f27f594e4d 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -3,6 +3,7 @@ version 1.0 import "../../../tasks/skylab/MergeSortBam.wdl" as Merge import "../../../tasks/skylab/FastqProcessing.wdl" as FastqProcessing import "../../../tasks/skylab/PairedTagUtils.wdl" as AddBB +"../../../tasks/broad/Utilities.wdl" as utils workflow ATAC { meta { From 8127ea8c43abb47e7e233ab532da04dbc867d6ce Mon Sep 17 00:00:00 2001 From: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Date: Thu, 29 Feb 2024 14:01:17 -0500 Subject: [PATCH 09/11] add import --- pipelines/skylab/multiome/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index f27f594e4d..bbfaf67f47 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../../tasks/skylab/MergeSortBam.wdl" as Merge import "../../../tasks/skylab/FastqProcessing.wdl" as FastqProcessing import "../../../tasks/skylab/PairedTagUtils.wdl" as AddBB -"../../../tasks/broad/Utilities.wdl" as utils +import "../../../tasks/broad/Utilities.wdl" as utils workflow ATAC { meta { From 445995a5304fecd35eefaa68b74d24b06ab22971 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 1 Mar 2024 10:43:37 -0500 Subject: [PATCH 10/11] small change --- pipelines/skylab/snM3C/snM3C.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/skylab/snM3C/snM3C.wdl b/pipelines/skylab/snM3C/snM3C.wdl index bcdc71a861..bac72eb68c 100644 --- a/pipelines/skylab/snM3C/snM3C.wdl +++ b/pipelines/skylab/snM3C/snM3C.wdl @@ -23,7 +23,6 @@ workflow snM3C { Int num_downstr_bases = 2 Int compress_level = 5 Int batch_number - } # version of the pipeline From cae0f54dd1480271a273fc40351b1fdb56d5e233 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com> Date: Wed, 6 Mar 2024 13:21:43 -0500 Subject: [PATCH 11/11] azurize optimus (#1228) * add logic to choose which docker * fix param_meta and import * add cloud provider to checkinput * handle hard coded white list paths in CheckInputs.wdl * last few dockers * last few dockers * last few dockers * change error msg * use ubuntu image * use ubuntu image * change whitelists * point to azure public whitelists * add sas token * echo whitelist * echo whitelist * testing for coa * testing for coa * change back to terra buckets for whitelists * change whitelists to point at public azure bucket * files to strings * print statemtns to checkinputs * string to files * change to terra bucket paths * strings not files * append sas token * append sas token * append sas and use strings * back to bucket urls * back to bucket urls * use google cloud urls * using public urls * trying to export sas_token * trying to export sas_token * trying to export sas_token * terra on gcp * update azure whitelist files * changelogs * changelogs * changelogs * changelogs * fix some inputs * fix some inputs * fix some inputs * fix some inputs * update optimus dockers * warp_tools_docker_path for staralign * stop using ice lake as default * update pipeline docs * 2 threads * counting mode * changelogs --------- Co-authored-by: phendriksen100 <103142505+phendriksen100@users.noreply.github.com> Co-authored-by: kayleemathews --- .../skylab/multiome/Multiome.changelog.md | 5 + pipelines/skylab/multiome/Multiome.wdl | 6 +- pipelines/skylab/multiome/atac.changelog.md | 7 +- pipelines/skylab/multiome/atac.wdl | 2 +- .../Plumbing/10k_pbmc_downsampled.json | 3 +- pipelines/skylab/optimus/Optimus.changelog.md | 5 + pipelines/skylab/optimus/Optimus.wdl | 101 ++++++++++++++---- .../Plumbing/human_v3_example.json | 3 +- .../Plumbing/mouse_v2_example.json | 3 +- .../Plumbing/mouse_v2_snRNA_example.json | 3 +- .../skylab/paired_tag/PairedTag.changelog.md | 4 + pipelines/skylab/paired_tag/PairedTag.wdl | 2 +- .../skylab/slideseq/SlideSeq.changelog.md | 8 ++ pipelines/skylab/slideseq/SlideSeq.wdl | 57 ++++++++-- .../Plumbing/Puck_210817_11.mm10.json | 3 +- ...iSampleSmartSeq2SingleNucleus.changelog.md | 10 ++ .../MultiSampleSmartSeq2SingleNucleus.wdl | 22 +++- .../test_inputs/Plumbing/mouse_example.json | 3 +- pipelines/skylab/snM3C/snM3C.changelog.md | 2 +- pipelines/skylab/snM3C/snM3C.wdl | 1 + tasks/skylab/CheckInputs.wdl | 36 ++++++- tasks/skylab/FastqProcessing.wdl | 7 +- tasks/skylab/H5adUtils.wdl | 8 +- tasks/skylab/MergeSortBam.wdl | 6 +- tasks/skylab/Metrics.wdl | 13 +-- tasks/skylab/RunEmptyDrops.wdl | 4 +- tasks/skylab/StarAlign.wdl | 26 +++-- .../TestMultiSampleSmartSeq2SingleNucleus.wdl | 5 +- verification/test-wdls/TestMultiome.wdl | 4 +- verification/test-wdls/TestOptimus.wdl | 5 +- verification/test-wdls/TestSlideSeq.wdl | 4 +- website/docs/Pipelines/ATAC/README.md | 2 +- .../Pipelines/Multiome_Pipeline/README.md | 5 +- .../docs/Pipelines/Optimus_Pipeline/README.md | 3 +- .../Pipelines/PairedTag_Pipeline/README.md | 2 +- .../Pipelines/SlideSeq_Pipeline/README.md | 3 +- .../README.md | 3 +- .../multi_snss2.methods.md | 4 +- 38 files changed, 301 insertions(+), 89 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index da8bc38753..6a82ca00da 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,8 @@ +# 3.2.2 +2024-03-01 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure. This change does not affect the Multiome pipeline. + # 3.2.1 2024-02-29 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 1e6bc2edae..64aa671836 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -6,10 +6,11 @@ import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender workflow Multiome { - String pipeline_version = "3.2.1" + String pipeline_version = "3.2.2" input { String input_id + String cloud_provider # Optimus Inputs String counting_mode = "sn_rna" @@ -68,7 +69,8 @@ workflow Multiome { ignore_r1_read_length = ignore_r1_read_length, star_strand_mode = star_strand_mode, count_exons = count_exons, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider } # Call the ATAC workflow diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md index 170caa2aed..005a2fb782 100644 --- a/pipelines/skylab/multiome/atac.changelog.md +++ b/pipelines/skylab/multiome/atac.changelog.md @@ -1,4 +1,9 @@ -# 1.1.8 +# 1.1.9 +2024-03-01 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure. This change does not affect the ATAC pipeline. + +* # 1.1.8 2024-02-07 (Date of Last Commit) * Updated the Metrics tasks to exclude mitochondrial genes from reads_mapped_uniquely, reads_mapped_multiple and reads_mapped_exonic, reads_mapped_exonic_as and reads_mapped_intergenic diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index 3dd81d7bf5..0431ba3997 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -41,7 +41,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "1.1.8" + String pipeline_version = "1.1.9" parameter_meta { read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads" diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json index 7d15111f38..bd9b7a1172 100644 --- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -23,5 +23,6 @@ "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake", "Multiome.Atac.num_threads_bwa":"16", "Multiome.Atac.mem_size_bwa":"64", - "Multiome.soloMultiMappers":"Uniform" + "Multiome.soloMultiMappers":"Uniform", + "Multiome.cloud_provider":"gcp" } diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 23098dd7a0..d76bedaed5 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,3 +1,8 @@ +# 6.4.2 +2024-03-01 (Date of Last Commit) +* Updated the Optimus.wdl to run on Azure. + + # 6.4.1 2024-02-29 (Date of Last Commit) * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Optimus workflow diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 159490afbf..ccfa5e35e5 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -7,6 +7,7 @@ import "../../../tasks/skylab/RunEmptyDrops.wdl" as RunEmptyDrops import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/skylab/MergeSortBam.wdl" as Merge import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils +import "../../../tasks/broad/Utilities.wdl" as utils workflow Optimus { meta { @@ -14,6 +15,8 @@ workflow Optimus { } input { + String cloud_provider + # Mode for counting either "sc_rna" or "sn_rna" String counting_mode = "sc_rna" @@ -45,36 +48,71 @@ workflow Optimus { # Set to true to override input checks and allow pipeline to proceed with invalid input Boolean force_no_check = false - + # Check that tenx_chemistry_version matches the length of the read 1 fastq; # Set to true if you expect that r1_read_length does not match length of UMIs/barcodes for 10x chemistry v2 (26 bp) or v3 (28 bp). Boolean ignore_r1_read_length = false # Set to Forward, Reverse, or Unstranded to account for stranded library preparations (per STARsolo documentation) String star_strand_mode = "Forward" - + # Set to true to count reads aligned to exonic regions in sn_rna mode Boolean count_exons = false # this pipeline does not set any preemptible varibles and only relies on the task-level preemptible settings # you could override the tasklevel preemptible settings by passing it as one of the workflows inputs # for example: `"Optimus.StarAlign.preemptible": 3` will let the StarAlign task, which by default disables the - # usage of preemptible machines, attempt to request for preemptible instance up to 3 times. + # usage of preemptible machines, attempt to request for preemptible instance up to 3 times. } # version of this pipeline - String pipeline_version = "6.4.1" + String pipeline_version = "6.4.2" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) # 10x parameters - File whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt" - File whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt" + File gcp_whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt" + File gcp_whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt" + File azure_whitelist_v2 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/737k-august-2016.txt" + File azure_whitelist_v3 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/3M-febrary-2018.txt" + # Takes the first read1 FASTQ from the inputs to check for chemistry match File r1_single_fastq = r1_fastq[0] + # docker images + String picard_cloud_docker = "picard-cloud:2.26.10" + String pytools_docker = "pytools:1.0.0-1661263730" + String empty_drops_docker = "empty-drops:1.0.1-4.2" + String star_docker = "star:1.0.1-2.7.11a-1692706072" + String warp_tools_docker_2_0_1 = "warp-tools:2.0.1" + String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985" + #TODO how do we handle these? + String alpine_docker = "alpine-bash:latest" + String gcp_alpine_docker_prefix = "bashell/" + String acr_alpine_docker_prefix = "dsppipelinedev.azurecr.io/" + String alpine_docker_prefix = if cloud_provider == "gcp" then gcp_alpine_docker_prefix else acr_alpine_docker_prefix + + String ubuntu_docker = "ubuntu_16_0_4:latest" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + + # choose docker prefix based on cloud provider + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + parameter_meta { r1_fastq: "forward read, contains cell barcodes and molecule barcodes" r2_fastq: "reverse read, contains cDNA fragment generated from captured mRNA" @@ -96,16 +134,21 @@ workflow Optimus { force_no_check = force_no_check, counting_mode = counting_mode, count_exons = count_exons, - whitelist_v2 = whitelist_v2, - whitelist_v3 = whitelist_v3, + gcp_whitelist_v2 = gcp_whitelist_v2, + gcp_whitelist_v3 = gcp_whitelist_v3, + azure_whitelist_v2 = azure_whitelist_v2, + azure_whitelist_v3 = azure_whitelist_v3, tenx_chemistry_version = tenx_chemistry_version, r1_fastq = r1_single_fastq, - ignore_r1_read_length = ignore_r1_read_length + ignore_r1_read_length = ignore_r1_read_length, + cloud_provider = cloud_provider, + alpine_docker_path = alpine_docker_prefix + alpine_docker } call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call FastqProcessing.FastqProcessing as SplitFastq { @@ -116,7 +159,8 @@ workflow Optimus { whitelist = whitelist, chemistry = tenx_chemistry_version, sample_id = input_id, - read_struct = read_struct + read_struct = read_struct, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -131,21 +175,24 @@ workflow Optimus { counting_mode = counting_mode, count_exons = count_exons, output_bam_basename = output_bam_basename + "_" + idx, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + star_docker_path = docker_prefix + star_docker } } call Merge.MergeSortBamFiles as MergeBam { input: bam_inputs = STARsoloFastq.bam_output, output_bam_filename = output_bam_basename + ".bam", - sort_order = "coordinate" + sort_order = "coordinate", + picard_cloud_docker_path = docker_prefix + picard_cloud_docker } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = MergeBam.output_bam, mt_genes = mt_genes, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call Metrics.CalculateCellMetrics as CellMetrics { @@ -153,7 +200,8 @@ workflow Optimus { bam_input = MergeBam.output_bam, mt_genes = mt_genes, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -165,7 +213,9 @@ workflow Optimus { summary = STARsoloFastq.summary, align_features = STARsoloFastq.align_features, umipercell = STARsoloFastq.umipercell, - input_id = input_id + input_id = input_id, + counting_mode = counting_mode, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } if (counting_mode == "sc_rna"){ call RunEmptyDrops.RunEmptyDrops { @@ -173,7 +223,8 @@ workflow Optimus { sparse_count_matrix = MergeStarOutputs.sparse_counts, row_index = MergeStarOutputs.row_index, col_index = MergeStarOutputs.col_index, - emptydrops_lower = emptydrops_lower + emptydrops_lower = emptydrops_lower, + empty_drops_docker_path = docker_prefix + empty_drops_docker } } @@ -192,7 +243,8 @@ workflow Optimus { gene_id = MergeStarOutputs.col_index, empty_drops_result = RunEmptyDrops.empty_drops_result, counting_mode = counting_mode, - pipeline_version = "Optimus_v~{pipeline_version}" + pipeline_version = "Optimus_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } if (count_exons && counting_mode=="sn_rna") { @@ -202,7 +254,13 @@ workflow Optimus { features = STARsoloFastq.features_sn_rna, matrix = STARsoloFastq.matrix_sn_rna, cell_reads = STARsoloFastq.cell_reads_sn_rna, - input_id = input_id + input_id = input_id, + counting_mode = "sc_rna", + summary = STARsoloFastq.summary_sn_rna, + align_features = STARsoloFastq.align_features_sn_rna, + umipercell = STARsoloFastq.umipercell_sn_rna, + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ input: @@ -219,7 +277,8 @@ workflow Optimus { sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts, cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, - pipeline_version = "Optimus_v~{pipeline_version}" + pipeline_version = "Optimus_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } @@ -238,11 +297,13 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out + File? library_metrics = MergeStarOutputs.library_metrics Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix Array[File?] multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix + # h5ad File h5ad_output_file = final_h5ad_output } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json index 612659d25c..667e632bbd 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json @@ -15,5 +15,6 @@ "Optimus.input_id": "pbmc_human_v3", "Optimus.tenx_chemistry_version": "3", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", - "Optimus.star_strand_mode": "Forward" + "Optimus.star_strand_mode": "Forward", + "Optimus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json index 0dc26af9fd..33e7553cb4 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json @@ -27,5 +27,6 @@ "Optimus.input_id": "neurons2k_mouse", "Optimus.tenx_chemistry_version": "2", "Optimus.star_strand_mode": "Unstranded", - "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf" + "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", + "Optimus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index 787a1a8347..fef0bd0f76 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -25,5 +25,6 @@ "Optimus.star_strand_mode": "Unstranded", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", "Optimus.counting_mode": "sn_rna", - "Optimus.count_exons": true + "Optimus.count_exons": true, + "Optimus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index 17255ab77f..ca066704a4 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,7 @@ +# 0.2.1 +2024-03-01 (Date of Last Commit) +* Updated the Optimus.wdl to run on Azure. This change does not affect the PairedTag pipeline. + # 0.2.0 2024-02-29 (Date of Last Commit) * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Paired-tag workflow diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index eb11e9acc4..29d2594152 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -5,7 +5,7 @@ import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing workflow PairedTag { - String pipeline_version = "0.2.0" + String pipeline_version = "0.2.1" input { String input_id diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index e041750353..1817b2665b 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,11 @@ +# 3.1.3 +2024-03-01 (Date of Last Commit) +* Updated the Optimus.wdl to run on Azure. This change does not affect the SlideSeq pipeline. + +# 3.1.2 +2024-02-28 (Date of Last Commit) +* Updated the Optimus workflow to produce a library-level metrics CSV; this does not impact the slide-seq pipeline + # 3.1.1 2024-02-29 (Date of Last Commit) * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Slideseq workflow diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index 66f6001da8..bc8df16dde 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -6,6 +6,8 @@ import "../../../tasks/skylab/Metrics.wdl" as Metrics import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/skylab/MergeSortBam.wdl" as Merge +import "../../../tasks/broad/Utilities.wdl" as utils + ## Copyright Broad Institute, 2022 ## @@ -23,7 +25,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge workflow SlideSeq { - String pipeline_version = "3.1.1" + String pipeline_version = "3.1.3" input { Array[File] r1_fastq @@ -39,6 +41,33 @@ workflow SlideSeq { Boolean count_exons = true File bead_locations + String cloud_provider + + } + + # docker images + String pytools_docker = "pytools:1.0.0-1661263730" + String picard_cloud_docker = "picard-cloud:2.26.10" + String warp_tools_docker_2_0_1 = "warp-tools:2.0.1" + String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985" + + String ubuntu_docker = "ubuntu_16_0_4:latest" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + + # choose docker prefix based on cloud provider + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } parameter_meta { @@ -51,7 +80,8 @@ workflow SlideSeq { call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call Metrics.FastqMetricsSlideSeq as FastqMetrics { @@ -86,13 +116,15 @@ workflow SlideSeq { input: bam_inputs = STARsoloFastqSlideSeq.bam_output, output_bam_filename = output_bam_basename + ".bam", - sort_order = "coordinate" + sort_order = "coordinate", + picard_cloud_docker_path = docker_prefix + picard_cloud_docker } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call Metrics.CalculateUMIsMetrics as UMIsMetrics { input: @@ -105,7 +137,9 @@ workflow SlideSeq { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 + } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -113,7 +147,8 @@ workflow SlideSeq { barcodes = STARsoloFastqSlideSeq.barcodes, features = STARsoloFastqSlideSeq.features, matrix = STARsoloFastqSlideSeq.matrix, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } if ( !count_exons ) { call H5adUtils.OptimusH5adGeneration as SlideseqH5adGeneration{ @@ -126,7 +161,9 @@ workflow SlideSeq { cell_id = MergeStarOutputs.row_index, gene_id = MergeStarOutputs.col_index, add_emptydrops_data = "no", - pipeline_version = "SlideSeq_v~{pipeline_version}" + pipeline_version = "SlideSeq_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 + } } if (count_exons) { @@ -135,7 +172,8 @@ workflow SlideSeq { barcodes = STARsoloFastqSlideSeq.barcodes_sn_rna, features = STARsoloFastqSlideSeq.features_sn_rna, matrix = STARsoloFastqSlideSeq.matrix_sn_rna, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ input: @@ -149,7 +187,8 @@ workflow SlideSeq { sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts, cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, - pipeline_version = "SlideSeq_v~{pipeline_version}" + pipeline_version = "SlideSeq_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } diff --git a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json index d8998d1d9b..035b22c58e 100644 --- a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json +++ b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json @@ -13,5 +13,6 @@ "SlideSeq.tar_star_reference": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/star/modified_star_2.7.9a_primary_gencode_mouse_vM23.tar", "SlideSeq.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf", "SlideSeq.count_exons": true, - "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv" + "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv", + "SlideSeq.cloud_provider": "gcp" } \ No newline at end of file diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 64b516e8b9..d3c50e9282 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,3 +1,13 @@ +# 1.3.2 +2024-03-01 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline. + +# 1.3.1 +2024-02-28 (Date of Last Commit) + +* Updated the Optimus workflow to produce a library-level metrics CSV; this does not impact the Single-nucleus Multi Sample Smart-seq2 pipeline + # 1.3.0 2024-01-22 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 7a4c1066f8..312e447204 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -6,6 +6,7 @@ import "../../../tasks/skylab/StarAlign.wdl" as StarAlign import "../../../tasks/skylab/Picard.wdl" as Picard import "../../../tasks/skylab/FeatureCounts.wdl" as CountAlignments import "../../../tasks/skylab/LoomUtils.wdl" as LoomUtils +import "../../../tasks/broad/Utilities.wdl" as utils workflow MultiSampleSmartSeq2SingleNucleus { meta { @@ -38,9 +39,25 @@ workflow MultiSampleSmartSeq2SingleNucleus { Array[String]? organ String? input_name_metadata_field String? input_id_metadata_field + + String cloud_provider + } + + String ubuntu_docker = "ubuntu_16_0_4:latest" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } + # Version of this pipeline - String pipeline_version = "1.3.0" + String pipeline_version = "1.3.2" if (false) { String? none = "None" @@ -72,7 +89,8 @@ workflow MultiSampleSmartSeq2SingleNucleus { call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call TrimAdapters.TrimAdapters as TrimAdapters { diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json index 8fafd92173..db8f68b114 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json @@ -18,5 +18,6 @@ "SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA", "SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT" ], - "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644" + "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644", + "MultiSampleSmartSeq2SingleNucleus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/snM3C/snM3C.changelog.md b/pipelines/skylab/snM3C/snM3C.changelog.md index dc90a21239..f3fb853b6c 100644 --- a/pipelines/skylab/snM3C/snM3C.changelog.md +++ b/pipelines/skylab/snM3C/snM3C.changelog.md @@ -1,7 +1,7 @@ # 2.0.1 2024-2-15 (Date of Last Commit) -* Updated the snM3C task memory, disk, and CPUs +* Updated the snM3C task memory, disk, and CPUs # 2.0.0 2024-2-13 (Date of Last Commit) diff --git a/pipelines/skylab/snM3C/snM3C.wdl b/pipelines/skylab/snM3C/snM3C.wdl index bac72eb68c..bcdc71a861 100644 --- a/pipelines/skylab/snM3C/snM3C.wdl +++ b/pipelines/skylab/snM3C/snM3C.wdl @@ -23,6 +23,7 @@ workflow snM3C { Int num_downstr_bases = 2 Int compress_level = 5 Int batch_number + } # version of the pipeline diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl index b24c77c133..89b99c7798 100644 --- a/tasks/skylab/CheckInputs.wdl +++ b/tasks/skylab/CheckInputs.wdl @@ -55,6 +55,8 @@ task checkInputArrays { task checkOptimusInput { input { + String cloud_provider + #String SAS_TOKEN File r1_fastq String counting_mode Boolean force_no_check @@ -63,9 +65,12 @@ task checkOptimusInput { Int machine_mem_mb = 1000 Int cpu = 1 Int tenx_chemistry_version - String whitelist_v2 - String whitelist_v3 + String gcp_whitelist_v2 + String gcp_whitelist_v3 + String azure_whitelist_v2 + String azure_whitelist_v3 Boolean ignore_r1_read_length + String alpine_docker_path } meta { @@ -108,15 +113,36 @@ task checkOptimusInput { echo "ERROR: Invalid value count_exons should not be used with \"${counting_mode}\" input." fi fi + # Check for chemistry version to produce read structure and whitelist if [[ ~{tenx_chemistry_version} == 2 ]] then - WHITELIST=~{whitelist_v2} + if [[ "~{cloud_provider}" == "gcp" ]] + then + WHITELIST=~{gcp_whitelist_v2} + elif [[ "~{cloud_provider}" == "azure" ]] + then + WHITELIST=~{azure_whitelist_v2} + else + pass="false" + echo "ERROR: Cloud provider must be either gcp or azure" + fi + echo "WHITELIST:" $WHITELIST echo $WHITELIST > whitelist.txt echo 16C10M > read_struct.txt elif [[ ~{tenx_chemistry_version} == 3 ]] then - WHITELIST=~{whitelist_v3} + if [[ "~{cloud_provider}" == "gcp" ]] + then + WHITELIST=~{gcp_whitelist_v3} + elif [[ "~{cloud_provider}" == "azure" ]] + then + WHITELIST=~{azure_whitelist_v3} + else + pass="false" + echo "ERROR: Cloud provider must be either gcp or azure" + fi + echo "WHITELIST:" $WHITELIST echo $WHITELIST > whitelist.txt echo 16C12M > read_struct.txt else @@ -153,7 +179,7 @@ task checkOptimusInput { String read_struct_out = read_string("read_struct.txt") } runtime { - docker: "bashell/alpine-bash:latest" + docker: alpine_docker_path cpu: cpu memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index a4d7a8e615..939d1e1e12 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -11,7 +11,8 @@ task FastqProcessing { String read_struct #using the latest build of warp-tools in GCR - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path + #runtime values Int machine_mem_mb = 40000 Int cpu = 16 @@ -34,7 +35,7 @@ task FastqProcessing { whitelist: "10x genomics cell barcode whitelist" chemistry: "chemistry employed, currently can be tenX_v2 or tenX_v3, the latter implies NO feature barcodes" sample_id: "name of sample matching this file, inserted into read group header" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -111,7 +112,7 @@ task FastqProcessing { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 18fed45fc1..99ef957e4b 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -6,7 +6,7 @@ task OptimusH5adGeneration { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path # name of the sample String input_id # user provided id @@ -88,7 +88,7 @@ task OptimusH5adGeneration { >>> runtime { - docker: docker + docker: warp_tools_docker_path cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" @@ -105,7 +105,7 @@ task SingleNucleusOptimusH5adOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path # name of the sample String input_id # user provided id @@ -170,7 +170,7 @@ task SingleNucleusOptimusH5adOutput { } runtime { - docker: docker + docker: warp_tools_docker_path cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" diff --git a/tasks/skylab/MergeSortBam.wdl b/tasks/skylab/MergeSortBam.wdl index 229ed18f8a..23ea466708 100644 --- a/tasks/skylab/MergeSortBam.wdl +++ b/tasks/skylab/MergeSortBam.wdl @@ -9,7 +9,7 @@ task MergeSortBamFiles { Int compression_level = 5 # runtime values - String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" + String picard_cloud_docker_path Int machine_mem_mb = 18150 Int cpu = 1 # default to 500GiB of space @@ -28,7 +28,7 @@ task MergeSortBamFiles { parameter_meta { bam_inputs: "Merges Sam/Bam files" sort_order: "sort order of output bam" - docker: "(optional) the docker image containing the runtime environment for this task" + picard_cloud_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -47,7 +47,7 @@ task MergeSortBamFiles { } runtime { - docker: docker + docker: picard_cloud_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/tasks/skylab/Metrics.wdl b/tasks/skylab/Metrics.wdl index fb91283d71..76b85d1012 100644 --- a/tasks/skylab/Metrics.wdl +++ b/tasks/skylab/Metrics.wdl @@ -8,7 +8,8 @@ task CalculateCellMetrics { String input_id # runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + + String warp_tools_docker_path Int machine_mem_mb = 8000 Int cpu = 4 Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) @@ -21,7 +22,7 @@ task CalculateCellMetrics { parameter_meta { bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GX)" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -64,7 +65,7 @@ task CalculateCellMetrics { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -85,7 +86,7 @@ task CalculateGeneMetrics { String input_id # runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path Int machine_mem_mb = 32000 Int cpu = 4 Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) @@ -99,7 +100,7 @@ task CalculateGeneMetrics { parameter_meta { bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GE)" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -144,7 +145,7 @@ task CalculateGeneMetrics { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/tasks/skylab/RunEmptyDrops.wdl b/tasks/skylab/RunEmptyDrops.wdl index a0f60b1c99..0921393862 100644 --- a/tasks/skylab/RunEmptyDrops.wdl +++ b/tasks/skylab/RunEmptyDrops.wdl @@ -16,7 +16,7 @@ task RunEmptyDrops { Int emptydrops_lower = 100 # runtime values - String docker = "us.gcr.io/broad-gotc-prod/empty-drops:1.0.1-4.2" + String empty_drops_docker_path Int machine_mem_mb = 32000 Int cpu = 1 Int disk = 20 @@ -48,7 +48,7 @@ task RunEmptyDrops { } runtime { - docker: docker + docker: empty_drops_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk_size + " GB" # TES diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 81f6668c42..e6ddc818f5 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -226,7 +226,7 @@ task STARsoloFastq { String? soloMultiMappers # runtime values - String docker = "us.gcr.io/broad-gotc-prod/star:1.0.1-2.7.11a-1692706072" + String star_docker_path Int machine_mem_mb = 64000 Int cpu = 8 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. @@ -244,7 +244,7 @@ task STARsoloFastq { r2_fastq: "array of forward read FASTQ files" tar_star_reference: "star reference tarball built against the species that the bam_input is derived from" star_strand_mode: "STAR mode for handling stranded reads. Options are 'Forward', 'Reverse, or 'Unstranded'" - docker: "(optional) the docker image containing the runtime environment for this task" + star_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -432,7 +432,7 @@ task STARsoloFastq { >>> runtime { - docker: docker + docker: star_docker_path memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" disk: disk + " GB" # TES @@ -475,11 +475,12 @@ task MergeStarOutput { Array[File]? summary Array[File]? align_features Array[File]? umipercell - + String? counting_mode + String input_id #runtime values - String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" + String warp_tools_docker_path Int machine_mem_gb = 20 Int cpu = 1 Int disk = ceil(size(matrix, "Gi") * 2) + 10 @@ -490,7 +491,7 @@ task MergeStarOutput { } parameter_meta { - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_gb: "(optional) the amount of memory (GiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -564,15 +565,18 @@ task MergeStarOutput { fi done - # If text files are present, create a tar archive with them + # If text files are present, create a tar archive with them and run python script to combine shard metrics if ls *.txt 1> /dev/null 2>&1; then + echo "listing files" + ls + python3 /warptools/scripts/combine_shard_metrics.py ~{input_id}_summary.txt ~{input_id}_align_features.txt ~{input_id}_cell_reads.txt ~{counting_mode} ~{input_id} tar -zcvf ~{input_id}.star_metrics.tar *.txt else echo "No text files found in the folder." fi # create the compressed raw count matrix with the counts, gene names and the barcodes - python3 /usr/gitc/create-merged-npz-output.py \ + python3 /warptools/scripts/create-merged-npz-output.py \ --barcodes ${barcodes_files[@]} \ --features ${features_files[@]} \ --matrix ${matrix_files[@]} \ @@ -580,7 +584,7 @@ task MergeStarOutput { >>> runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_gb} GiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -593,6 +597,7 @@ task MergeStarOutput { File col_index = "~{input_id}_sparse_counts_col_index.npy" File sparse_counts = "~{input_id}_sparse_counts.npz" File? cell_reads_out = "~{input_id}.star_metrics.tar" + File? library_metrics="~{input_id}_library_metrics.csv" } } @@ -717,6 +722,7 @@ task STARGenomeRefVersion { input { String tar_star_reference Int disk = 10 + String ubuntu_docker_path } meta { @@ -749,7 +755,7 @@ task STARGenomeRefVersion { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: ubuntu_docker_path memory: "2 GiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl index a09838c3a4..228b6b1f41 100644 --- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl @@ -33,6 +33,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { Boolean update_truth String vault_token_path String google_account_vault_path + + String cloud_provider } meta { @@ -57,7 +59,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { species = species, organ = organ, input_name_metadata_field = input_name_metadata_field, - input_id_metadata_field = input_id_metadata_field + input_id_metadata_field = input_id_metadata_field, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl index 9a4a0ec83a..6da047efcc 100644 --- a/verification/test-wdls/TestMultiome.wdl +++ b/verification/test-wdls/TestMultiome.wdl @@ -10,6 +10,7 @@ workflow TestMultiome { input { String input_id + String cloud_provider # Optimus Inputs String counting_mode = "sn_rna" @@ -85,7 +86,8 @@ workflow TestMultiome { chrom_sizes = chrom_sizes, atac_whitelist = atac_whitelist, run_cellbender = run_cellbender, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl index 82bdf03adc..51e34e04e9 100644 --- a/verification/test-wdls/TestOptimus.wdl +++ b/verification/test-wdls/TestOptimus.wdl @@ -59,6 +59,8 @@ workflow TestOptimus { String vault_token_path String google_account_vault_path + String cloud_provider + } meta { @@ -84,7 +86,8 @@ workflow TestOptimus { star_strand_mode = star_strand_mode, count_exons = count_exons, ignore_r1_read_length = ignore_r1_read_length, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider } # Collect all of the pipeling output into single Array diff --git a/verification/test-wdls/TestSlideSeq.wdl b/verification/test-wdls/TestSlideSeq.wdl index b63cd87099..b0523fee21 100644 --- a/verification/test-wdls/TestSlideSeq.wdl +++ b/verification/test-wdls/TestSlideSeq.wdl @@ -26,6 +26,7 @@ workflow TestSlideSeq { Boolean update_truth String vault_token_path String google_account_vault_path + String cloud_provider } meta { @@ -43,7 +44,8 @@ workflow TestSlideSeq { annotations_gtf = annotations_gtf, output_bam_basename = output_bam_basename, count_exons = count_exons, - bead_locations = bead_locations + bead_locations = bead_locations, + cloud_provider = cloud_provider } diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 4f0750f35d..547bbeb5ac 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -8,7 +8,7 @@ slug: /Pipelines/ATAC/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [1.1.8](https://github.com/broadinstitute/warp/releases) | January, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [1.1.9](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the ATAC workflow diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index 3409347d3f..511f27c285 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -8,7 +8,7 @@ slug: /Pipelines/Multiome_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [Multiome v3.2.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) | +| [Multiome v3.2.2](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) | ![Multiome_diagram](./multiome_diagram.png) @@ -56,6 +56,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | Input name | Description | Type | | --- | --- | --- | | input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID. | String | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | | annotations_gtf | GTF file containing gene annotations used for GEX cell metric calculation and ATAC fragment metrics; must match the GTF used to build the STAR aligner. | File | | gex_r1_fastq | Array of read 1 FASTQ files representing a single GEX 10x library. | Array[File] | | gex_r2_fastq | Array of read 2 FASTQ files representing a single GEX 10x library.| Array[File] | @@ -69,7 +70,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | ignore_r1_read_length | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should ignore barcode chemistry check; if "true", the workflow will not ensure the `10x_chemistry_version` input matches the chemistry in the read 1 FASTQ; default is "false". | Boolean | | star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String | | count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean | -| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt". | File | +| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" when run on GCP. | File | | soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String | | atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | | atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] | diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 382804e447..67a8ea0f7b 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Optimus_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [optimus_v6.4.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [optimus_v6.4.2](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | March, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![Optimus_diagram](Optimus_diagram.png) @@ -85,6 +85,7 @@ The example configuration files also contain metadata for the reference files, d | Parameter name | Description | Optional attributes (when applicable) | | --- | --- | --- | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | | whitelist | List of known CBs; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) whitelist that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom whitelist can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom whitelist, set the input `ignore_r1_read_length` to "true". | N/A | | read_struct | String describing the structure of reads; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) read structure that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom read structure can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom read structure, set the input `force_no_check` to "true". | N/A | | tar_star_reference | TAR file containing a species-specific reference genome and GTF; it is generated using the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl). | N/A | diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md index cc0114a766..40d588fb58 100644 --- a/website/docs/Pipelines/PairedTag_Pipeline/README.md +++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/PairedTag_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [PairedTag_v0.1.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | +| [PairedTag_v0.2.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the Paired-Tag workflow diff --git a/website/docs/Pipelines/SlideSeq_Pipeline/README.md b/website/docs/Pipelines/SlideSeq_Pipeline/README.md index 0b59323acf..7cf8c08935 100644 --- a/website/docs/Pipelines/SlideSeq_Pipeline/README.md +++ b/website/docs/Pipelines/SlideSeq_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/SlideSeq_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [SlideSeq v3.1.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | +| [SlideSeq v3.1.2](https://github.com/broadinstitute/warp/releases) | March, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | ![SlideSeq_diagram](./slide-seq_diagram.png) @@ -69,6 +69,7 @@ The Slide-seq workflow inputs are specified in JSON configuration files. Example | output_bam_basename | Optional string used for the output BAM file basename. | String | | count_exons | Optional boolean indicating if the workflow should calculate exon counts; default is set to “true” and produces an h5ad file containing both whole-gene counts and exon counts in an additional layer; when set to “false”, an h5ad file containing only whole-gene counts is produced. | Boolean | | bead_locations | Whitelist TSV file containing bead barcodes and XY coordinates on a single line for each bead; determined by sequencing prior to mRNA transfer and library preparation. | File | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | #### Pseudogene handling diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index 09acab0beb..1613d69876 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [MultiSampleSmartSeq2SingleNuclei_v1.3.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [MultiSampleSmartSeq2SingleNuclei_v1.3.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![](./snSS2.png) @@ -82,6 +82,7 @@ The table below details the Multi-snSS2 inputs. The pipeline is designed to take | species | Optional description of the species from which the cells were derived. | Array of strings | | input_name_metadata_field | Optional input describing, when applicable, the metadata field containing the `input_names`. | String | | input_id_metadata_field | Optional string describing, when applicable, the metadata field containing the `input_ids`. | String | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | ## Multi-snSS2 tasks and tools diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md index 8ab56b15bd..a758e085cb 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md @@ -2,13 +2,13 @@ sidebar_position: 2 --- -# Smart-seq2 Single Nucleus Multi-Sample v1.3.0 Publication Methods +# Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Publication Methods Below we provide an example methods section for a publication. For the complete pipeline documentation, see the [Smart-seq2 Single Nucleus Multi-Sample Overview](./README.md). ## Methods -Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.0 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. +Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. For each nucleus in the batch, paired-end FASTQ files were first trimmed to remove adapters using the fastq-mcf tool with a subsampling parameter of 200,000 reads. The trimmed FASTQ files were then aligned to the GENCODE GRCm38 mouse genome using STAR v.2.7.10a. To count the number of reads per gene, but not isoforms, the quantMode parameter was set to GeneCounts. Multi-mapped reads, and optical and PCR duplicates, were removed from the resulting aligned BAM using the Picard MarkDuplicates tool with REMOVE_DUPLICATES = true. Metrics were collected on the deduplicated BAM using Picard CollectMultipleMetrics with VALIDATION_STRINGENCY =SILENT.