diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index da8bc38753..6a82ca00da 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,8 @@ +# 3.2.2 +2024-03-01 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure. This change does not affect the Multiome pipeline. + # 3.2.1 2024-02-29 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 1e6bc2edae..73ae9d9670 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -4,12 +4,15 @@ import "../../../pipelines/skylab/multiome/atac.wdl" as atac import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender +import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { - String pipeline_version = "3.2.1" + String pipeline_version = "3.2.2" input { + String cloud_provider String input_id + String cloud_provider # Optimus Inputs String counting_mode = "sn_rna" @@ -25,7 +28,6 @@ workflow Multiome { Boolean ignore_r1_read_length = false String star_strand_mode = "Forward" Boolean count_exons = false - File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" String? soloMultiMappers # ATAC inputs @@ -33,7 +35,6 @@ workflow Multiome { Array[File] atac_r1_fastq Array[File] atac_r2_fastq Array[File] atac_r3_fastq - # BWA tar reference File tar_bwa_reference # Chromosone sizes @@ -41,17 +42,42 @@ workflow Multiome { # Trimadapters input String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" - # Whitelist - File atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" # CellBender Boolean run_cellbender = false } + # Determine docker prefix based on cloud provider + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # Define docker images + String snap_atac_docker_image = "snapatac2:1.0.5-2.3.2-1709230223" + + # Define all whitelist files + File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" + File gcp_atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" + File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt" + File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt" + + # Determine which whitelist files to use based on cloud provider + File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist + File atac_whitelist = if cloud_provider == "gcp" then gcp_atac_whitelist else azure_atac_whitelist + + # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + # Call the Optimus workflow call optimus.Optimus as Optimus { input: + cloud_provider = cloud_provider, counting_mode = counting_mode, r1_fastq = gex_r1_fastq, r2_fastq = gex_r2_fastq, @@ -68,12 +94,14 @@ workflow Multiome { ignore_r1_read_length = ignore_r1_read_length, star_strand_mode = star_strand_mode, count_exons = count_exons, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider } # Call the ATAC workflow call atac.ATAC as Atac { input: + cloud_provider = cloud_provider, read1_fastq_gzipped = atac_r1_fastq, read2_fastq_gzipped = atac_r2_fastq, read3_fastq_gzipped = atac_r3_fastq, @@ -87,6 +115,7 @@ workflow Multiome { } call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes { input: + docker_path = docker_prefix + snap_atac_docker_image, atac_h5ad = Atac.snap_metrics, gex_h5ad = Optimus.h5ad_output_file, gex_whitelist = gex_whitelist, @@ -108,7 +137,6 @@ workflow Multiome { hardware_preemptible_tries = 2, hardware_zones = "us-central1-a us-central1-c", nvidia_driver_version = "470.82.01" - } } diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md index 170caa2aed..005a2fb782 100644 --- a/pipelines/skylab/multiome/atac.changelog.md +++ b/pipelines/skylab/multiome/atac.changelog.md @@ -1,4 +1,9 @@ -# 1.1.8 +# 1.1.9 +2024-03-01 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure. This change does not affect the ATAC pipeline. + +* # 1.1.8 2024-02-07 (Date of Last Commit) * Updated the Metrics tasks to exclude mitochondrial genes from reads_mapped_uniquely, reads_mapped_multiple and reads_mapped_exonic, reads_mapped_exonic_as and reads_mapped_intergenic diff --git a/pipelines/skylab/multiome/atac.json b/pipelines/skylab/multiome/atac.json index a8b9465fdc..1e898edd48 100644 --- a/pipelines/skylab/multiome/atac.json +++ b/pipelines/skylab/multiome/atac.json @@ -4,6 +4,7 @@ "ATAC.TrimAdapters.adapter_seq_read1": "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG", "ATAC.TrimAdapters.adapter_seq_read2": "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG", "ATAC.input_id": "scATAC", + "ATAC.cloud_provider":"gcp", "ATAC.tar_bwa_reference": "gs://fc-dd55e131-ef49-4d02-aa2a-20640daaae1e/submissions/8f0dd71a-b42f-4503-b839-3f146941758a/IndexRef/53a91851-1f6c-4ab9-af66-b338ffb28b5a/call-BwaMem2Index/GRCh38.primary_assembly.genome.bwamem2.fa.tar", "ATAC.preindex": "false" } diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index 3dd81d7bf5..be597c1f62 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -3,6 +3,7 @@ version 1.0 import "../../../tasks/skylab/MergeSortBam.wdl" as Merge import "../../../tasks/skylab/FastqProcessing.wdl" as FastqProcessing import "../../../tasks/skylab/PairedTagUtils.wdl" as AddBB +import "../../../tasks/broad/Utilities.wdl" as utils workflow ATAC { meta { @@ -18,6 +19,7 @@ workflow ATAC { # Output prefix/base name for all intermediate files and pipeline outputs String input_id + String cloud_provider # Option for running files with preindex Boolean preindex = false @@ -41,7 +43,27 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "1.1.8" + String pipeline_version = "1.1.9" + + # Determine docker prefix based on cloud provider + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # Docker image names + String warp_tools_2_0_0 = "warp-tools:2.0.0" + String cutadapt_docker = "cutadapt:1.0.0-4.4-1709146458" + String samtools_docker = "samtools-dist-bwa:3.0.0" + String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" + String snap_atac_docker = "snapatac2:1.0.4-2.3.1" + + # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } parameter_meta { read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads" @@ -52,7 +74,6 @@ workflow ATAC { num_threads_bwa: "Number of threads for bwa-mem2 task (default: 128)" mem_size_bwa: "Memory size in GB for bwa-mem2 task (default: 512)" cpu_platform_bwa: "CPU platform for bwa-mem2 task (default: Intel Ice Lake)" - } call GetNumSplits { @@ -69,7 +90,8 @@ workflow ATAC { barcodes_fastq = read2_fastq_gzipped, output_base_name = input_id, num_output_files = GetNumSplits.ranks_per_node_out, - whitelist = whitelist + whitelist = whitelist, + docker_path = docker_prefix + warp_tools_2_0_0 } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -79,7 +101,8 @@ workflow ATAC { read3_fastq = SplitFastq.fastq_R3_output_array[idx], output_base_name = input_id + "_" + idx, adapter_seq_read1 = adapter_seq_read1, - adapter_seq_read3 = adapter_seq_read3 + adapter_seq_read3 = adapter_seq_read3, + docker_path = docker_prefix + cutadapt_docker } } @@ -91,21 +114,24 @@ workflow ATAC { output_base_name = input_id, nthreads = num_threads_bwa, mem_size = mem_size_bwa, - cpu_platform = cpu_platform_bwa + cpu_platform = cpu_platform_bwa, + docker_path = docker_prefix + samtools_docker } if (preindex) { call AddBB.AddBBTag as BBTag { input: bam = BWAPairedEndAlignment.bam_aligned_output, - input_id = input_id + input_id = input_id, + docker_path = docker_prefix + upstools_docker } call CreateFragmentFile as BB_fragment { input: bam = BBTag.bb_bam, chrom_sizes = chrom_sizes, annotations_gtf = annotations_gtf, - preindex = preindex + preindex = preindex, + docker_path = docker_prefix + snap_atac_docker } } if (!preindex) { @@ -114,7 +140,8 @@ workflow ATAC { bam = BWAPairedEndAlignment.bam_aligned_output, chrom_sizes = chrom_sizes, annotations_gtf = annotations_gtf, - preindex = preindex + preindex = preindex, + docker_path = docker_prefix + snap_atac_docker } } @@ -231,7 +258,7 @@ task TrimAdapters { # Runtime attributes/docker Int disk_size = ceil(2 * ( size(read1_fastq, "GiB") + size(read3_fastq, "GiB") )) + 200 Int mem_size = 4 - String docker_image = "us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919" + String docker_path } parameter_meta { @@ -242,7 +269,7 @@ task TrimAdapters { adapter_seq_read1: "cutadapt option for the sequence adapter for read 1 fastq" adapter_seq_read3: "cutadapt option for the sequence adapter for read 3 fastq" output_base_name: "base name to be used for the output of the task" - docker_image: "the docker image using cutadapt to be used (default:us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919)" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "the size of memory used during trimming adapters" disk_size : "disk size used in trimming adapters step" } @@ -269,7 +296,7 @@ task TrimAdapters { # use docker image for given tool cutadapat runtime { - docker: docker_image + docker: docker_path disks: "local-disk ${disk_size} HDD" memory: "${mem_size} GiB" } @@ -290,7 +317,7 @@ task BWAPairedEndAlignment { String read_group_sample_name = "RGSN1" String suffix = "trimmed_adapters.fastq.gz" String output_base_name - String docker_image = "us.gcr.io/broad-gotc-prod/samtools-dist-bwa:2.0.0" + String docker_path # Runtime attributes Int disk_size = 2000 @@ -309,7 +336,7 @@ task BWAPairedEndAlignment { mem_size: "the size of memory used during alignment" disk_size : "disk size used in bwa alignment step" output_base_name: "basename to be used for the output of the task" - docker_image: "the docker image using BWA to be used (default: us.gcr.io/broad-gotc-prod/samtools-bwa-mem-2:1.0.0-2.2.1_x64-linux-1685469504)" + docker_path: "The docker image path containing the runtime environment for this task" } String bam_aligned_output_name = output_base_name + ".bam" @@ -418,7 +445,7 @@ task BWAPairedEndAlignment { >>> runtime { - docker: docker_image + docker: docker_path disks: "local-disk ${disk_size} SSD" cpu: nthreads cpuPlatform: cpu_platform @@ -442,6 +469,7 @@ task CreateFragmentFile { Int mem_size = 16 Int nthreads = 1 String cpuPlatform = "Intel Cascade Lake" + String docker_path } String bam_base_name = basename(bam, ".bam") @@ -452,6 +480,7 @@ task CreateFragmentFile { chrom_sizes: "Text file containing chrom_sizes for genome build (i.e. hg38)." disk_size: "Disk size used in create fragment file step." mem_size: "The size of memory used in create fragment file." + docker_path: "The docker image path containing the runtime environment for this task" } command <<< @@ -492,7 +521,7 @@ task CreateFragmentFile { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1" + docker: docker_path disks: "local-disk ${disk_size} SSD" memory: "${mem_size} GiB" cpu: nthreads diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json index 7d15111f38..c4a7d6d5d7 100644 --- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -1,6 +1,7 @@ { "Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", "Multiome.input_id":"10k_PBMC_downsampled", + "Multiome.cloud_provider":"gcp", "Multiome.gex_r1_fastq":[ "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_gex.fastq.gz" ], @@ -23,5 +24,6 @@ "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake", "Multiome.Atac.num_threads_bwa":"16", "Multiome.Atac.mem_size_bwa":"64", - "Multiome.soloMultiMappers":"Uniform" + "Multiome.soloMultiMappers":"Uniform", + "Multiome.cloud_provider":"gcp" } diff --git a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json index a5ddf2c947..3ca7b1d546 100644 --- a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json +++ b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json @@ -5,6 +5,7 @@ "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_I1_001.fastq.gz" ], "Multiome.input_id":"10k_PBMC", + "Multiome.cloud_provider":"gcp", "Multiome.gex_r1_fastq":[ "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R1_001.fastq.gz", "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R1_001.fastq.gz" diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 23098dd7a0..d76bedaed5 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,3 +1,8 @@ +# 6.4.2 +2024-03-01 (Date of Last Commit) +* Updated the Optimus.wdl to run on Azure. + + # 6.4.1 2024-02-29 (Date of Last Commit) * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Optimus workflow diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 159490afbf..ccfa5e35e5 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -7,6 +7,7 @@ import "../../../tasks/skylab/RunEmptyDrops.wdl" as RunEmptyDrops import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/skylab/MergeSortBam.wdl" as Merge import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils +import "../../../tasks/broad/Utilities.wdl" as utils workflow Optimus { meta { @@ -14,6 +15,8 @@ workflow Optimus { } input { + String cloud_provider + # Mode for counting either "sc_rna" or "sn_rna" String counting_mode = "sc_rna" @@ -45,36 +48,71 @@ workflow Optimus { # Set to true to override input checks and allow pipeline to proceed with invalid input Boolean force_no_check = false - + # Check that tenx_chemistry_version matches the length of the read 1 fastq; # Set to true if you expect that r1_read_length does not match length of UMIs/barcodes for 10x chemistry v2 (26 bp) or v3 (28 bp). Boolean ignore_r1_read_length = false # Set to Forward, Reverse, or Unstranded to account for stranded library preparations (per STARsolo documentation) String star_strand_mode = "Forward" - + # Set to true to count reads aligned to exonic regions in sn_rna mode Boolean count_exons = false # this pipeline does not set any preemptible varibles and only relies on the task-level preemptible settings # you could override the tasklevel preemptible settings by passing it as one of the workflows inputs # for example: `"Optimus.StarAlign.preemptible": 3` will let the StarAlign task, which by default disables the - # usage of preemptible machines, attempt to request for preemptible instance up to 3 times. + # usage of preemptible machines, attempt to request for preemptible instance up to 3 times. } # version of this pipeline - String pipeline_version = "6.4.1" + String pipeline_version = "6.4.2" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) # 10x parameters - File whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt" - File whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt" + File gcp_whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt" + File gcp_whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt" + File azure_whitelist_v2 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/737k-august-2016.txt" + File azure_whitelist_v3 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/3M-febrary-2018.txt" + # Takes the first read1 FASTQ from the inputs to check for chemistry match File r1_single_fastq = r1_fastq[0] + # docker images + String picard_cloud_docker = "picard-cloud:2.26.10" + String pytools_docker = "pytools:1.0.0-1661263730" + String empty_drops_docker = "empty-drops:1.0.1-4.2" + String star_docker = "star:1.0.1-2.7.11a-1692706072" + String warp_tools_docker_2_0_1 = "warp-tools:2.0.1" + String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985" + #TODO how do we handle these? + String alpine_docker = "alpine-bash:latest" + String gcp_alpine_docker_prefix = "bashell/" + String acr_alpine_docker_prefix = "dsppipelinedev.azurecr.io/" + String alpine_docker_prefix = if cloud_provider == "gcp" then gcp_alpine_docker_prefix else acr_alpine_docker_prefix + + String ubuntu_docker = "ubuntu_16_0_4:latest" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + + # choose docker prefix based on cloud provider + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + parameter_meta { r1_fastq: "forward read, contains cell barcodes and molecule barcodes" r2_fastq: "reverse read, contains cDNA fragment generated from captured mRNA" @@ -96,16 +134,21 @@ workflow Optimus { force_no_check = force_no_check, counting_mode = counting_mode, count_exons = count_exons, - whitelist_v2 = whitelist_v2, - whitelist_v3 = whitelist_v3, + gcp_whitelist_v2 = gcp_whitelist_v2, + gcp_whitelist_v3 = gcp_whitelist_v3, + azure_whitelist_v2 = azure_whitelist_v2, + azure_whitelist_v3 = azure_whitelist_v3, tenx_chemistry_version = tenx_chemistry_version, r1_fastq = r1_single_fastq, - ignore_r1_read_length = ignore_r1_read_length + ignore_r1_read_length = ignore_r1_read_length, + cloud_provider = cloud_provider, + alpine_docker_path = alpine_docker_prefix + alpine_docker } call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call FastqProcessing.FastqProcessing as SplitFastq { @@ -116,7 +159,8 @@ workflow Optimus { whitelist = whitelist, chemistry = tenx_chemistry_version, sample_id = input_id, - read_struct = read_struct + read_struct = read_struct, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -131,21 +175,24 @@ workflow Optimus { counting_mode = counting_mode, count_exons = count_exons, output_bam_basename = output_bam_basename + "_" + idx, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + star_docker_path = docker_prefix + star_docker } } call Merge.MergeSortBamFiles as MergeBam { input: bam_inputs = STARsoloFastq.bam_output, output_bam_filename = output_bam_basename + ".bam", - sort_order = "coordinate" + sort_order = "coordinate", + picard_cloud_docker_path = docker_prefix + picard_cloud_docker } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = MergeBam.output_bam, mt_genes = mt_genes, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call Metrics.CalculateCellMetrics as CellMetrics { @@ -153,7 +200,8 @@ workflow Optimus { bam_input = MergeBam.output_bam, mt_genes = mt_genes, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -165,7 +213,9 @@ workflow Optimus { summary = STARsoloFastq.summary, align_features = STARsoloFastq.align_features, umipercell = STARsoloFastq.umipercell, - input_id = input_id + input_id = input_id, + counting_mode = counting_mode, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } if (counting_mode == "sc_rna"){ call RunEmptyDrops.RunEmptyDrops { @@ -173,7 +223,8 @@ workflow Optimus { sparse_count_matrix = MergeStarOutputs.sparse_counts, row_index = MergeStarOutputs.row_index, col_index = MergeStarOutputs.col_index, - emptydrops_lower = emptydrops_lower + emptydrops_lower = emptydrops_lower, + empty_drops_docker_path = docker_prefix + empty_drops_docker } } @@ -192,7 +243,8 @@ workflow Optimus { gene_id = MergeStarOutputs.col_index, empty_drops_result = RunEmptyDrops.empty_drops_result, counting_mode = counting_mode, - pipeline_version = "Optimus_v~{pipeline_version}" + pipeline_version = "Optimus_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } if (count_exons && counting_mode=="sn_rna") { @@ -202,7 +254,13 @@ workflow Optimus { features = STARsoloFastq.features_sn_rna, matrix = STARsoloFastq.matrix_sn_rna, cell_reads = STARsoloFastq.cell_reads_sn_rna, - input_id = input_id + input_id = input_id, + counting_mode = "sc_rna", + summary = STARsoloFastq.summary_sn_rna, + align_features = STARsoloFastq.align_features_sn_rna, + umipercell = STARsoloFastq.umipercell_sn_rna, + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ input: @@ -219,7 +277,8 @@ workflow Optimus { sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts, cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, - pipeline_version = "Optimus_v~{pipeline_version}" + pipeline_version = "Optimus_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } @@ -238,11 +297,13 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out + File? library_metrics = MergeStarOutputs.library_metrics Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix Array[File?] multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix + # h5ad File h5ad_output_file = final_h5ad_output } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json index 612659d25c..667e632bbd 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json @@ -15,5 +15,6 @@ "Optimus.input_id": "pbmc_human_v3", "Optimus.tenx_chemistry_version": "3", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", - "Optimus.star_strand_mode": "Forward" + "Optimus.star_strand_mode": "Forward", + "Optimus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json index 0dc26af9fd..33e7553cb4 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json @@ -27,5 +27,6 @@ "Optimus.input_id": "neurons2k_mouse", "Optimus.tenx_chemistry_version": "2", "Optimus.star_strand_mode": "Unstranded", - "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf" + "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", + "Optimus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index 787a1a8347..fef0bd0f76 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -25,5 +25,6 @@ "Optimus.star_strand_mode": "Unstranded", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", "Optimus.counting_mode": "sn_rna", - "Optimus.count_exons": true + "Optimus.count_exons": true, + "Optimus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index 17255ab77f..ca066704a4 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,7 @@ +# 0.2.1 +2024-03-01 (Date of Last Commit) +* Updated the Optimus.wdl to run on Azure. This change does not affect the PairedTag pipeline. + # 0.2.0 2024-02-29 (Date of Last Commit) * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Paired-tag workflow diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index eb11e9acc4..29d2594152 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -5,7 +5,7 @@ import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing workflow PairedTag { - String pipeline_version = "0.2.0" + String pipeline_version = "0.2.1" input { String input_id diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index e041750353..1817b2665b 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,11 @@ +# 3.1.3 +2024-03-01 (Date of Last Commit) +* Updated the Optimus.wdl to run on Azure. This change does not affect the SlideSeq pipeline. + +# 3.1.2 +2024-02-28 (Date of Last Commit) +* Updated the Optimus workflow to produce a library-level metrics CSV; this does not impact the slide-seq pipeline + # 3.1.1 2024-02-29 (Date of Last Commit) * Added mem and disk to inputs of Join Barcodes task of Multiome workflow; does not impact the Slideseq workflow diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index 66f6001da8..bc8df16dde 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -6,6 +6,8 @@ import "../../../tasks/skylab/Metrics.wdl" as Metrics import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/skylab/MergeSortBam.wdl" as Merge +import "../../../tasks/broad/Utilities.wdl" as utils + ## Copyright Broad Institute, 2022 ## @@ -23,7 +25,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge workflow SlideSeq { - String pipeline_version = "3.1.1" + String pipeline_version = "3.1.3" input { Array[File] r1_fastq @@ -39,6 +41,33 @@ workflow SlideSeq { Boolean count_exons = true File bead_locations + String cloud_provider + + } + + # docker images + String pytools_docker = "pytools:1.0.0-1661263730" + String picard_cloud_docker = "picard-cloud:2.26.10" + String warp_tools_docker_2_0_1 = "warp-tools:2.0.1" + String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985" + + String ubuntu_docker = "ubuntu_16_0_4:latest" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + + # choose docker prefix based on cloud provider + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } parameter_meta { @@ -51,7 +80,8 @@ workflow SlideSeq { call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call Metrics.FastqMetricsSlideSeq as FastqMetrics { @@ -86,13 +116,15 @@ workflow SlideSeq { input: bam_inputs = STARsoloFastqSlideSeq.bam_output, output_bam_filename = output_bam_basename + ".bam", - sort_order = "coordinate" + sort_order = "coordinate", + picard_cloud_docker_path = docker_prefix + picard_cloud_docker } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call Metrics.CalculateUMIsMetrics as UMIsMetrics { input: @@ -105,7 +137,9 @@ workflow SlideSeq { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 + } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -113,7 +147,8 @@ workflow SlideSeq { barcodes = STARsoloFastqSlideSeq.barcodes, features = STARsoloFastqSlideSeq.features, matrix = STARsoloFastqSlideSeq.matrix, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } if ( !count_exons ) { call H5adUtils.OptimusH5adGeneration as SlideseqH5adGeneration{ @@ -126,7 +161,9 @@ workflow SlideSeq { cell_id = MergeStarOutputs.row_index, gene_id = MergeStarOutputs.col_index, add_emptydrops_data = "no", - pipeline_version = "SlideSeq_v~{pipeline_version}" + pipeline_version = "SlideSeq_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 + } } if (count_exons) { @@ -135,7 +172,8 @@ workflow SlideSeq { barcodes = STARsoloFastqSlideSeq.barcodes_sn_rna, features = STARsoloFastqSlideSeq.features_sn_rna, matrix = STARsoloFastqSlideSeq.matrix_sn_rna, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_2 } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ input: @@ -149,7 +187,8 @@ workflow SlideSeq { sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts, cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, - pipeline_version = "SlideSeq_v~{pipeline_version}" + pipeline_version = "SlideSeq_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } diff --git a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json index d8998d1d9b..035b22c58e 100644 --- a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json +++ b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json @@ -13,5 +13,6 @@ "SlideSeq.tar_star_reference": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/star/modified_star_2.7.9a_primary_gencode_mouse_vM23.tar", "SlideSeq.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf", "SlideSeq.count_exons": true, - "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv" + "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv", + "SlideSeq.cloud_provider": "gcp" } \ No newline at end of file diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 64b516e8b9..d3c50e9282 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,3 +1,13 @@ +# 1.3.2 +2024-03-01 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline. + +# 1.3.1 +2024-02-28 (Date of Last Commit) + +* Updated the Optimus workflow to produce a library-level metrics CSV; this does not impact the Single-nucleus Multi Sample Smart-seq2 pipeline + # 1.3.0 2024-01-22 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 7a4c1066f8..312e447204 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -6,6 +6,7 @@ import "../../../tasks/skylab/StarAlign.wdl" as StarAlign import "../../../tasks/skylab/Picard.wdl" as Picard import "../../../tasks/skylab/FeatureCounts.wdl" as CountAlignments import "../../../tasks/skylab/LoomUtils.wdl" as LoomUtils +import "../../../tasks/broad/Utilities.wdl" as utils workflow MultiSampleSmartSeq2SingleNucleus { meta { @@ -38,9 +39,25 @@ workflow MultiSampleSmartSeq2SingleNucleus { Array[String]? organ String? input_name_metadata_field String? input_id_metadata_field + + String cloud_provider + } + + String ubuntu_docker = "ubuntu_16_0_4:latest" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } + # Version of this pipeline - String pipeline_version = "1.3.0" + String pipeline_version = "1.3.2" if (false) { String? none = "None" @@ -72,7 +89,8 @@ workflow MultiSampleSmartSeq2SingleNucleus { call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call TrimAdapters.TrimAdapters as TrimAdapters { diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json index 8fafd92173..db8f68b114 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json @@ -18,5 +18,6 @@ "SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA", "SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT" ], - "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644" + "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644", + "MultiSampleSmartSeq2SingleNucleus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/snM3C/snM3C.changelog.md b/pipelines/skylab/snM3C/snM3C.changelog.md index dc90a21239..f3fb853b6c 100644 --- a/pipelines/skylab/snM3C/snM3C.changelog.md +++ b/pipelines/skylab/snM3C/snM3C.changelog.md @@ -1,7 +1,7 @@ # 2.0.1 2024-2-15 (Date of Last Commit) -* Updated the snM3C task memory, disk, and CPUs +* Updated the snM3C task memory, disk, and CPUs # 2.0.0 2024-2-13 (Date of Last Commit) diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl index b24c77c133..89b99c7798 100644 --- a/tasks/skylab/CheckInputs.wdl +++ b/tasks/skylab/CheckInputs.wdl @@ -55,6 +55,8 @@ task checkInputArrays { task checkOptimusInput { input { + String cloud_provider + #String SAS_TOKEN File r1_fastq String counting_mode Boolean force_no_check @@ -63,9 +65,12 @@ task checkOptimusInput { Int machine_mem_mb = 1000 Int cpu = 1 Int tenx_chemistry_version - String whitelist_v2 - String whitelist_v3 + String gcp_whitelist_v2 + String gcp_whitelist_v3 + String azure_whitelist_v2 + String azure_whitelist_v3 Boolean ignore_r1_read_length + String alpine_docker_path } meta { @@ -108,15 +113,36 @@ task checkOptimusInput { echo "ERROR: Invalid value count_exons should not be used with \"${counting_mode}\" input." fi fi + # Check for chemistry version to produce read structure and whitelist if [[ ~{tenx_chemistry_version} == 2 ]] then - WHITELIST=~{whitelist_v2} + if [[ "~{cloud_provider}" == "gcp" ]] + then + WHITELIST=~{gcp_whitelist_v2} + elif [[ "~{cloud_provider}" == "azure" ]] + then + WHITELIST=~{azure_whitelist_v2} + else + pass="false" + echo "ERROR: Cloud provider must be either gcp or azure" + fi + echo "WHITELIST:" $WHITELIST echo $WHITELIST > whitelist.txt echo 16C10M > read_struct.txt elif [[ ~{tenx_chemistry_version} == 3 ]] then - WHITELIST=~{whitelist_v3} + if [[ "~{cloud_provider}" == "gcp" ]] + then + WHITELIST=~{gcp_whitelist_v3} + elif [[ "~{cloud_provider}" == "azure" ]] + then + WHITELIST=~{azure_whitelist_v3} + else + pass="false" + echo "ERROR: Cloud provider must be either gcp or azure" + fi + echo "WHITELIST:" $WHITELIST echo $WHITELIST > whitelist.txt echo 16C12M > read_struct.txt else @@ -153,7 +179,7 @@ task checkOptimusInput { String read_struct_out = read_string("read_struct.txt") } runtime { - docker: "bashell/alpine-bash:latest" + docker: alpine_docker_path cpu: cpu memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index a4d7a8e615..bd6f9b06b7 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -11,7 +11,8 @@ task FastqProcessing { String read_struct #using the latest build of warp-tools in GCR - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path + #runtime values Int machine_mem_mb = 40000 Int cpu = 16 @@ -34,7 +35,7 @@ task FastqProcessing { whitelist: "10x genomics cell barcode whitelist" chemistry: "chemistry employed, currently can be tenX_v2 or tenX_v3, the latter implies NO feature barcodes" sample_id: "name of sample matching this file, inserted into read group header" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -111,7 +112,7 @@ task FastqProcessing { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -243,10 +244,7 @@ task FastqProcessATAC { String output_base_name File whitelist String barcode_index1 = basename(barcodes_fastq[0]) - - # [?] copied from corresponding optimus wdl for fastqprocessing - # using the latest build of warp-tools in GCR - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String docker_path # Runtime attributes [?] Int mem_size = 5 @@ -272,7 +270,7 @@ task FastqProcessATAC { read_structure: "A string that specifies the barcode (C) positions in the Read 2 fastq" barcode_orientation: "A string that specifies the orientation of barcode needed for scATAC data. The default is FIRST_BP. Other options include LAST_BP, FIRST_BP_RC or LAST_BP_RC." whitelist: "10x genomics cell barcode whitelist for scATAC" - docker: "(optional) the docker image containing the runtime environment for this task" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk_size: "(optional) the amount of disk space (GiB) to provision for this task" @@ -361,7 +359,7 @@ task FastqProcessATAC { >>> runtime { - docker: docker + docker: docker_path cpu: cpu memory: "${mem_size} MiB" disks: "local-disk ${disk_size} HDD" diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 18fed45fc1..54a27de18f 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -6,7 +6,7 @@ task OptimusH5adGeneration { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path # name of the sample String input_id # user provided id @@ -88,7 +88,7 @@ task OptimusH5adGeneration { >>> runtime { - docker: docker + docker: warp_tools_docker_path cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" @@ -105,7 +105,7 @@ task SingleNucleusOptimusH5adOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path # name of the sample String input_id # user provided id @@ -170,7 +170,7 @@ task SingleNucleusOptimusH5adOutput { } runtime { - docker: docker + docker: warp_tools_docker_path cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" @@ -184,7 +184,7 @@ task SingleNucleusOptimusH5adOutput { } task JoinMultiomeBarcodes { - input { + input { File atac_h5ad File atac_fragment File gex_h5ad @@ -196,9 +196,9 @@ task JoinMultiomeBarcodes { Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000 Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10 } - String gex_base_name = basename(gex_h5ad, ".h5ad") - String atac_base_name = basename(atac_h5ad, ".h5ad") - String atac_fragment_base = basename(atac_fragment, ".tsv") + String gex_base_name = basename(gex_h5ad, ".h5ad") + String atac_base_name = basename(atac_h5ad, ".h5ad") + String atac_fragment_base = basename(atac_fragment, ".tsv") parameter_meta { atac_h5ad: "The resulting h5ad from the ATAC workflow." @@ -277,7 +277,7 @@ task JoinMultiomeBarcodes { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229" + docker: docker_path disks: "local-disk ~{disk} HDD" memory: "${machine_mem_mb} MiB" cpu: nthreads diff --git a/tasks/skylab/MergeSortBam.wdl b/tasks/skylab/MergeSortBam.wdl index 229ed18f8a..23ea466708 100644 --- a/tasks/skylab/MergeSortBam.wdl +++ b/tasks/skylab/MergeSortBam.wdl @@ -9,7 +9,7 @@ task MergeSortBamFiles { Int compression_level = 5 # runtime values - String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" + String picard_cloud_docker_path Int machine_mem_mb = 18150 Int cpu = 1 # default to 500GiB of space @@ -28,7 +28,7 @@ task MergeSortBamFiles { parameter_meta { bam_inputs: "Merges Sam/Bam files" sort_order: "sort order of output bam" - docker: "(optional) the docker image containing the runtime environment for this task" + picard_cloud_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -47,7 +47,7 @@ task MergeSortBamFiles { } runtime { - docker: docker + docker: picard_cloud_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/tasks/skylab/Metrics.wdl b/tasks/skylab/Metrics.wdl index fb91283d71..76b85d1012 100644 --- a/tasks/skylab/Metrics.wdl +++ b/tasks/skylab/Metrics.wdl @@ -8,7 +8,8 @@ task CalculateCellMetrics { String input_id # runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + + String warp_tools_docker_path Int machine_mem_mb = 8000 Int cpu = 4 Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) @@ -21,7 +22,7 @@ task CalculateCellMetrics { parameter_meta { bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GX)" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -64,7 +65,7 @@ task CalculateCellMetrics { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -85,7 +86,7 @@ task CalculateGeneMetrics { String input_id # runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path Int machine_mem_mb = 32000 Int cpu = 4 Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) @@ -99,7 +100,7 @@ task CalculateGeneMetrics { parameter_meta { bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GE)" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -144,7 +145,7 @@ task CalculateGeneMetrics { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl index 779ac4fe57..3abc7df45a 100644 --- a/tasks/skylab/PairedTagUtils.wdl +++ b/tasks/skylab/PairedTagUtils.wdl @@ -130,9 +130,7 @@ task AddBBTag { input { File bam String input_id - - # using the latest build of upstools docker in GCR - String docker = "us.gcr.io/broad-gotc-prod/upstools:1.0.0-2023.03.03-1704300311" + String docker_path # Runtime attributes Int mem_size = 8 @@ -150,7 +148,7 @@ task AddBBTag { parameter_meta { bam: "BAM with aligned reads and barcode in the CB tag" input_id: "input ID" - docker: "(optional) the docker image containing the runtime environment for this task" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk_size: "(optional) the amount of disk space (GiB) to provision for this task" @@ -169,7 +167,7 @@ task AddBBTag { >>> runtime { - docker: docker + docker: docker_path cpu: cpu memory: "${mem_size} GiB" disks: "local-disk ${disk_size} HDD" diff --git a/tasks/skylab/RunEmptyDrops.wdl b/tasks/skylab/RunEmptyDrops.wdl index a0f60b1c99..0921393862 100644 --- a/tasks/skylab/RunEmptyDrops.wdl +++ b/tasks/skylab/RunEmptyDrops.wdl @@ -16,7 +16,7 @@ task RunEmptyDrops { Int emptydrops_lower = 100 # runtime values - String docker = "us.gcr.io/broad-gotc-prod/empty-drops:1.0.1-4.2" + String empty_drops_docker_path Int machine_mem_mb = 32000 Int cpu = 1 Int disk = 20 @@ -48,7 +48,7 @@ task RunEmptyDrops { } runtime { - docker: docker + docker: empty_drops_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk_size + " GB" # TES diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 81f6668c42..e6ddc818f5 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -226,7 +226,7 @@ task STARsoloFastq { String? soloMultiMappers # runtime values - String docker = "us.gcr.io/broad-gotc-prod/star:1.0.1-2.7.11a-1692706072" + String star_docker_path Int machine_mem_mb = 64000 Int cpu = 8 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. @@ -244,7 +244,7 @@ task STARsoloFastq { r2_fastq: "array of forward read FASTQ files" tar_star_reference: "star reference tarball built against the species that the bam_input is derived from" star_strand_mode: "STAR mode for handling stranded reads. Options are 'Forward', 'Reverse, or 'Unstranded'" - docker: "(optional) the docker image containing the runtime environment for this task" + star_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -432,7 +432,7 @@ task STARsoloFastq { >>> runtime { - docker: docker + docker: star_docker_path memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" disk: disk + " GB" # TES @@ -475,11 +475,12 @@ task MergeStarOutput { Array[File]? summary Array[File]? align_features Array[File]? umipercell - + String? counting_mode + String input_id #runtime values - String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" + String warp_tools_docker_path Int machine_mem_gb = 20 Int cpu = 1 Int disk = ceil(size(matrix, "Gi") * 2) + 10 @@ -490,7 +491,7 @@ task MergeStarOutput { } parameter_meta { - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_gb: "(optional) the amount of memory (GiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -564,15 +565,18 @@ task MergeStarOutput { fi done - # If text files are present, create a tar archive with them + # If text files are present, create a tar archive with them and run python script to combine shard metrics if ls *.txt 1> /dev/null 2>&1; then + echo "listing files" + ls + python3 /warptools/scripts/combine_shard_metrics.py ~{input_id}_summary.txt ~{input_id}_align_features.txt ~{input_id}_cell_reads.txt ~{counting_mode} ~{input_id} tar -zcvf ~{input_id}.star_metrics.tar *.txt else echo "No text files found in the folder." fi # create the compressed raw count matrix with the counts, gene names and the barcodes - python3 /usr/gitc/create-merged-npz-output.py \ + python3 /warptools/scripts/create-merged-npz-output.py \ --barcodes ${barcodes_files[@]} \ --features ${features_files[@]} \ --matrix ${matrix_files[@]} \ @@ -580,7 +584,7 @@ task MergeStarOutput { >>> runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_gb} GiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -593,6 +597,7 @@ task MergeStarOutput { File col_index = "~{input_id}_sparse_counts_col_index.npy" File sparse_counts = "~{input_id}_sparse_counts.npz" File? cell_reads_out = "~{input_id}.star_metrics.tar" + File? library_metrics="~{input_id}_library_metrics.csv" } } @@ -717,6 +722,7 @@ task STARGenomeRefVersion { input { String tar_star_reference Int disk = 10 + String ubuntu_docker_path } meta { @@ -749,7 +755,7 @@ task STARGenomeRefVersion { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: ubuntu_docker_path memory: "2 GiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl index a09838c3a4..228b6b1f41 100644 --- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl @@ -33,6 +33,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { Boolean update_truth String vault_token_path String google_account_vault_path + + String cloud_provider } meta { @@ -57,7 +59,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { species = species, organ = organ, input_name_metadata_field = input_name_metadata_field, - input_id_metadata_field = input_id_metadata_field + input_id_metadata_field = input_id_metadata_field, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl index 9a4a0ec83a..6da047efcc 100644 --- a/verification/test-wdls/TestMultiome.wdl +++ b/verification/test-wdls/TestMultiome.wdl @@ -10,6 +10,7 @@ workflow TestMultiome { input { String input_id + String cloud_provider # Optimus Inputs String counting_mode = "sn_rna" @@ -85,7 +86,8 @@ workflow TestMultiome { chrom_sizes = chrom_sizes, atac_whitelist = atac_whitelist, run_cellbender = run_cellbender, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl index 82bdf03adc..51e34e04e9 100644 --- a/verification/test-wdls/TestOptimus.wdl +++ b/verification/test-wdls/TestOptimus.wdl @@ -59,6 +59,8 @@ workflow TestOptimus { String vault_token_path String google_account_vault_path + String cloud_provider + } meta { @@ -84,7 +86,8 @@ workflow TestOptimus { star_strand_mode = star_strand_mode, count_exons = count_exons, ignore_r1_read_length = ignore_r1_read_length, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider } # Collect all of the pipeling output into single Array diff --git a/verification/test-wdls/TestSlideSeq.wdl b/verification/test-wdls/TestSlideSeq.wdl index b63cd87099..b0523fee21 100644 --- a/verification/test-wdls/TestSlideSeq.wdl +++ b/verification/test-wdls/TestSlideSeq.wdl @@ -26,6 +26,7 @@ workflow TestSlideSeq { Boolean update_truth String vault_token_path String google_account_vault_path + String cloud_provider } meta { @@ -43,7 +44,8 @@ workflow TestSlideSeq { annotations_gtf = annotations_gtf, output_bam_basename = output_bam_basename, count_exons = count_exons, - bead_locations = bead_locations + bead_locations = bead_locations, + cloud_provider = cloud_provider } diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 4f0750f35d..547bbeb5ac 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -8,7 +8,7 @@ slug: /Pipelines/ATAC/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [1.1.8](https://github.com/broadinstitute/warp/releases) | January, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [1.1.9](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the ATAC workflow diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index 3409347d3f..511f27c285 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -8,7 +8,7 @@ slug: /Pipelines/Multiome_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [Multiome v3.2.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) | +| [Multiome v3.2.2](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) | ![Multiome_diagram](./multiome_diagram.png) @@ -56,6 +56,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | Input name | Description | Type | | --- | --- | --- | | input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID. | String | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | | annotations_gtf | GTF file containing gene annotations used for GEX cell metric calculation and ATAC fragment metrics; must match the GTF used to build the STAR aligner. | File | | gex_r1_fastq | Array of read 1 FASTQ files representing a single GEX 10x library. | Array[File] | | gex_r2_fastq | Array of read 2 FASTQ files representing a single GEX 10x library.| Array[File] | @@ -69,7 +70,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | ignore_r1_read_length | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should ignore barcode chemistry check; if "true", the workflow will not ensure the `10x_chemistry_version` input matches the chemistry in the read 1 FASTQ; default is "false". | Boolean | | star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String | | count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean | -| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt". | File | +| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" when run on GCP. | File | | soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String | | atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | | atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] | diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 382804e447..67a8ea0f7b 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Optimus_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [optimus_v6.4.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [optimus_v6.4.2](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | March, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![Optimus_diagram](Optimus_diagram.png) @@ -85,6 +85,7 @@ The example configuration files also contain metadata for the reference files, d | Parameter name | Description | Optional attributes (when applicable) | | --- | --- | --- | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | | whitelist | List of known CBs; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) whitelist that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom whitelist can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom whitelist, set the input `ignore_r1_read_length` to "true". | N/A | | read_struct | String describing the structure of reads; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) read structure that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom read structure can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom read structure, set the input `force_no_check` to "true". | N/A | | tar_star_reference | TAR file containing a species-specific reference genome and GTF; it is generated using the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl). | N/A | diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md index cc0114a766..40d588fb58 100644 --- a/website/docs/Pipelines/PairedTag_Pipeline/README.md +++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/PairedTag_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [PairedTag_v0.1.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | +| [PairedTag_v0.2.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the Paired-Tag workflow diff --git a/website/docs/Pipelines/SlideSeq_Pipeline/README.md b/website/docs/Pipelines/SlideSeq_Pipeline/README.md index 0b59323acf..7cf8c08935 100644 --- a/website/docs/Pipelines/SlideSeq_Pipeline/README.md +++ b/website/docs/Pipelines/SlideSeq_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/SlideSeq_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [SlideSeq v3.1.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | +| [SlideSeq v3.1.2](https://github.com/broadinstitute/warp/releases) | March, 2024 | Elizabeth Kiernan & Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | ![SlideSeq_diagram](./slide-seq_diagram.png) @@ -69,6 +69,7 @@ The Slide-seq workflow inputs are specified in JSON configuration files. Example | output_bam_basename | Optional string used for the output BAM file basename. | String | | count_exons | Optional boolean indicating if the workflow should calculate exon counts; default is set to “true” and produces an h5ad file containing both whole-gene counts and exon counts in an additional layer; when set to “false”, an h5ad file containing only whole-gene counts is produced. | Boolean | | bead_locations | Whitelist TSV file containing bead barcodes and XY coordinates on a single line for each bead; determined by sequencing prior to mRNA transfer and library preparation. | File | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | #### Pseudogene handling diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index 09acab0beb..1613d69876 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [MultiSampleSmartSeq2SingleNuclei_v1.3.0](https://github.com/broadinstitute/warp/releases) | February, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [MultiSampleSmartSeq2SingleNuclei_v1.3.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![](./snSS2.png) @@ -82,6 +82,7 @@ The table below details the Multi-snSS2 inputs. The pipeline is designed to take | species | Optional description of the species from which the cells were derived. | Array of strings | | input_name_metadata_field | Optional input describing, when applicable, the metadata field containing the `input_names`. | String | | input_id_metadata_field | Optional string describing, when applicable, the metadata field containing the `input_ids`. | String | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | ## Multi-snSS2 tasks and tools diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md index 8ab56b15bd..a758e085cb 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/multi_snss2.methods.md @@ -2,13 +2,13 @@ sidebar_position: 2 --- -# Smart-seq2 Single Nucleus Multi-Sample v1.3.0 Publication Methods +# Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Publication Methods Below we provide an example methods section for a publication. For the complete pipeline documentation, see the [Smart-seq2 Single Nucleus Multi-Sample Overview](./README.md). ## Methods -Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.0 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. +Data preprocessing and count matrix construction for a batch (or plate) were performed using the Smart-seq2 Single Nucleus Multi-Sample v1.3.1 Pipeline (RRID:SCR_021312) as well as Picard v.2.26.10 with default tool parameters unless otherwise specified. Genomic references are publicly available in the [Broad References](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/mm10/v0/single_nucleus?pageState=(%22StorageObjectListTable%22:(%22f%22:%22%255B%255D%22))&prefix=&forceOnObjectsSortingFiltering=false) Google Bucket and are also listed in the [example workflow configuration](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/mouse_example.json) in GitHub. For each nucleus in the batch, paired-end FASTQ files were first trimmed to remove adapters using the fastq-mcf tool with a subsampling parameter of 200,000 reads. The trimmed FASTQ files were then aligned to the GENCODE GRCm38 mouse genome using STAR v.2.7.10a. To count the number of reads per gene, but not isoforms, the quantMode parameter was set to GeneCounts. Multi-mapped reads, and optical and PCR duplicates, were removed from the resulting aligned BAM using the Picard MarkDuplicates tool with REMOVE_DUPLICATES = true. Metrics were collected on the deduplicated BAM using Picard CollectMultipleMetrics with VALIDATION_STRINGENCY =SILENT.