From 2510aae8e7ef29d14951990a9cd824b114e615d2 Mon Sep 17 00:00:00 2001 From: npetrill Date: Thu, 1 Aug 2024 13:14:52 -0400 Subject: [PATCH 01/24] pin all latest docker version --- .../arrays/multi_sample/MultiSampleArrays.wdl | 2 +- .../cram_to_unmapped_bams/CramToUnmappedBams.wdl | 2 +- pipelines/skylab/atac/atac.wdl | 4 ++-- projects/tasks/AdapterTasks.wdl | 2 +- tasks/broad/IlluminaGenotypingArrayTasks.wdl | 2 +- tasks/broad/InternalArraysTasks.wdl | 6 +++--- tasks/broad/InternalTasks.wdl | 2 +- tasks/skylab/CheckInputs.wdl | 2 +- .../src/main/resources/dummy/dummyWorkflow.wdl | 2 +- verification/VerifyExternalReprocessing.wdl | 2 +- verification/VerifyGermlineSingleSample.wdl | 2 +- verification/VerifyIlluminaGenotypingArray.wdl | 4 ++-- verification/VerifyJointGenotyping.wdl | 2 +- verification/VerifyMetrics.wdl | 2 +- verification/VerifyTasks.wdl | 14 +++++++------- .../VerifyUltimaGenomicsJointGenotyping.wdl | 2 +- .../VerifyUltimaGenomicsWholeGenomeGermline.wdl | 2 +- 17 files changed, 27 insertions(+), 27 deletions(-) diff --git a/pipelines/broad/arrays/multi_sample/MultiSampleArrays.wdl b/pipelines/broad/arrays/multi_sample/MultiSampleArrays.wdl index 1633c8cd31..6e5757c101 100644 --- a/pipelines/broad/arrays/multi_sample/MultiSampleArrays.wdl +++ b/pipelines/broad/arrays/multi_sample/MultiSampleArrays.wdl @@ -83,7 +83,7 @@ task SplitFoFnToListFoFn { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" memory: "1 GiB" } diff --git a/pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl b/pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl index d54d9a4cf7..bef7a7893b 100644 --- a/pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl +++ b/pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl @@ -222,7 +222,7 @@ task SplitUpOutputMapFile { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk " + disk_size + " HDD" memory: "~{memory_in_MiB} MiB" } diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 8526d0cbc1..8b2fd36af7 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -173,12 +173,12 @@ task GetNumSplits { Int nthreads Int mem_size String cpu_platform - String docker_image = "ubuntu:latest" + String docker_image = "ubuntu@sha256:2e863c44b718727c860746568e1d54afd13b2fa71b160f5cd9058fc436217b30" String vm_size } parameter_meta { - docker_image: "the ubuntu docker image (default: ubuntu:latest)" + docker_image: "the ubuntu docker image (default: ubuntu@sha256:2e863c44b718727c860746568e1d54afd13b2fa71b160f5cd9058fc436217b30)" nthreads: "Number of threads per node (default: 128)" mem_size: "the size of memory used during alignment" vm_size: "the virtual machine used for the task" diff --git a/projects/tasks/AdapterTasks.wdl b/projects/tasks/AdapterTasks.wdl index 8f57dd2e80..d8916e38bd 100644 --- a/projects/tasks/AdapterTasks.wdl +++ b/projects/tasks/AdapterTasks.wdl @@ -452,7 +452,7 @@ task GetCloudFileCreationDate { input { String file_path - String docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:latest" + String docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:486.0.0" Int cpu = 1 Int memory_mb = 1000 Int disk_size_gb = 1 diff --git a/tasks/broad/IlluminaGenotypingArrayTasks.wdl b/tasks/broad/IlluminaGenotypingArrayTasks.wdl index 2598bed60b..4ca4074fce 100644 --- a/tasks/broad/IlluminaGenotypingArrayTasks.wdl +++ b/tasks/broad/IlluminaGenotypingArrayTasks.wdl @@ -14,7 +14,7 @@ task Md5Sum { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "3.5 GiB" preemptible: 3 diff --git a/tasks/broad/InternalArraysTasks.wdl b/tasks/broad/InternalArraysTasks.wdl index 76d56fb23d..e9ec3afe15 100644 --- a/tasks/broad/InternalArraysTasks.wdl +++ b/tasks/broad/InternalArraysTasks.wdl @@ -335,7 +335,7 @@ task CreateChipWellBarcodeParamsFile { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: preemptible_tries @@ -445,7 +445,7 @@ task ResolveExtendedIlluminaManifestFile { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: preemptible_tries @@ -480,7 +480,7 @@ task ResolveMinorAlleleFrequencyFile { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: preemptible_tries diff --git a/tasks/broad/InternalTasks.wdl b/tasks/broad/InternalTasks.wdl index 75dc79adb5..b3bef735a6 100644 --- a/tasks/broad/InternalTasks.wdl +++ b/tasks/broad/InternalTasks.wdl @@ -17,7 +17,7 @@ task MakeSafeFilename { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "1 GiB" preemptible: 3 diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl index 57fbcaad1a..526586fedd 100644 --- a/tasks/skylab/CheckInputs.wdl +++ b/tasks/skylab/CheckInputs.wdl @@ -43,7 +43,7 @@ task checkInputArrays { } runtime { - docker: "bashell/alpine-bash:latest" + docker: "bashell/alpine-bash@sha256:965a718a07c700a5204c77e391961edee37477634ce2f9cf652a8e4c2db858ff" cpu: 1 memory: "1 GiB" disks: "local-disk ~{disk} HDD" diff --git a/tests/broad/scala_test/src/main/resources/dummy/dummyWorkflow.wdl b/tests/broad/scala_test/src/main/resources/dummy/dummyWorkflow.wdl index 38a9735689..aed9ae2a9e 100644 --- a/tests/broad/scala_test/src/main/resources/dummy/dummyWorkflow.wdl +++ b/tests/broad/scala_test/src/main/resources/dummy/dummyWorkflow.wdl @@ -33,6 +33,6 @@ task PrintMessageToStdout { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" } } \ No newline at end of file diff --git a/verification/VerifyExternalReprocessing.wdl b/verification/VerifyExternalReprocessing.wdl index 7e6a697d5d..a6b6005b5b 100644 --- a/verification/VerifyExternalReprocessing.wdl +++ b/verification/VerifyExternalReprocessing.wdl @@ -32,7 +32,7 @@ task AssertTrue { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "3.5 GiB" preemptible: 3 diff --git a/verification/VerifyGermlineSingleSample.wdl b/verification/VerifyGermlineSingleSample.wdl index 314f3f51a3..9bea981c37 100755 --- a/verification/VerifyGermlineSingleSample.wdl +++ b/verification/VerifyGermlineSingleSample.wdl @@ -90,7 +90,7 @@ task CompareGvcfs { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 300 HDD" memory: "${memory_mb} MiB" preemptible: 3 diff --git a/verification/VerifyIlluminaGenotypingArray.wdl b/verification/VerifyIlluminaGenotypingArray.wdl index a5422ee5cf..6b3bc77ef7 100644 --- a/verification/VerifyIlluminaGenotypingArray.wdl +++ b/verification/VerifyIlluminaGenotypingArray.wdl @@ -155,7 +155,7 @@ task CompareFiles { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: 3 @@ -175,7 +175,7 @@ task CompareMetricFilesAsText { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: 3 diff --git a/verification/VerifyJointGenotyping.wdl b/verification/VerifyJointGenotyping.wdl index 8f5c54458b..64311f55ea 100644 --- a/verification/VerifyJointGenotyping.wdl +++ b/verification/VerifyJointGenotyping.wdl @@ -93,7 +93,7 @@ task CompareFingerprints { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: 3 diff --git a/verification/VerifyMetrics.wdl b/verification/VerifyMetrics.wdl index 53a638259a..3baa400998 100644 --- a/verification/VerifyMetrics.wdl +++ b/verification/VerifyMetrics.wdl @@ -58,7 +58,7 @@ task CompareTwoNumbers { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "3.5 GiB" preemptible: 3 diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index 547fac62ee..e6be841384 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -19,7 +19,7 @@ task CompareVcfs { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 70 HDD" memory: "32 GiB" preemptible: 3 @@ -49,7 +49,7 @@ task CompareVcfsAllowingQualityDifferences { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 50 HDD" memory: "3 GiB" preemptible: 3 @@ -182,7 +182,7 @@ task CompareTextFiles { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 100 HDD" memory: "50 GiB" preemptible: 3 @@ -210,7 +210,7 @@ task CompareCrams { cmp -i "$test_offset:$truth_offset" ~{test_cram} ~{truth_cram} } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk " + disk_size_gb + " HDD" memory: "2 GiB" preemptible: 3 @@ -230,7 +230,7 @@ task CompareCrais { cmp <(zcat ~{test_crai} | cut -f1,2,3,5,6) <(zcat ~{truth_crai} | cut -f1,2,3,5,6) } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: 3 @@ -320,7 +320,7 @@ task CompareCompressedTextFiles { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk " + disk_size + " HDD" memory: "20 GiB" preemptible: 3 @@ -589,7 +589,7 @@ task CompareSnapTextFiles { >>> runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 50 HDD" memory: "25 GiB" preemptible: 3 diff --git a/verification/VerifyUltimaGenomicsJointGenotyping.wdl b/verification/VerifyUltimaGenomicsJointGenotyping.wdl index 4d2c19a5cc..aa0c6ded02 100644 --- a/verification/VerifyUltimaGenomicsJointGenotyping.wdl +++ b/verification/VerifyUltimaGenomicsJointGenotyping.wdl @@ -79,7 +79,7 @@ task CompareFingerprints { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 10 HDD" memory: "2 GiB" preemptible: 3 diff --git a/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl b/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl index c206f04926..bf7cfdfb85 100755 --- a/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl +++ b/verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl @@ -135,7 +135,7 @@ task CompareGvcfs { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" disks: "local-disk 70 HDD" memory: "2 GiB" preemptible: 3 From 8590e2ee38cbe79ddbb43c9062aa7e1b1d5a76df Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 23 Aug 2024 12:03:09 -0400 Subject: [PATCH 02/24] run with h5ad isntead o floom --- tasks/skylab/LoomUtils.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index 145b2ebc77..f504f5b6fc 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -300,7 +300,7 @@ task SingleNucleusOptimusLoomOutput { task SingleNucleusSmartSeq2LoomOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np-add-multisamplesnss2-h5ad-script" Array[File] alignment_summary_metrics Array[File] dedup_metrics @@ -361,17 +361,17 @@ task SingleNucleusSmartSeq2LoomOutput { echo "Success GroupQCs" # create the loom file - echo "Running create_loom_snss2." - python3 /usr/gitc/create_loom_snss2.py \ + echo "Running create_h5ad_snss2.py" + python3 /usr/gitc/create_h5ad_snss2.py \ --qc_files "${output_prefix[$i]}.Picard_group.csv" \ --count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \ - --output_loom_path "${output_prefix[$i]}.loom" \ + --output_h5ad_path "${output_prefix[$i]}" \ --input_id ${output_prefix[$i]} \ ~{"--input_id_metadata_field " + input_id_metadata_field} \ ~{"--input_name_metadata_field " + input_name_metadata_field} \ --pipeline_version ~{pipeline_version} - echo "Success create_loom_snss2" + echo "Success create_h5ad_snss2" done; >>> From da11f5b1b73e81715293ea1b00bdc71c85d69a61 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 23 Aug 2024 12:50:39 -0400 Subject: [PATCH 03/24] run with h5ad isntead o floom --- .../MultiSampleSmartSeq2SingleNucleus.wdl | 8 ++++---- tasks/skylab/LoomUtils.wdl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 51d6b6c212..1123868f10 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -129,7 +129,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { annotation_gtf = annotations_gtf } - call LoomUtils.SingleNucleusSmartSeq2LoomOutput as LoomOutput { + call LoomUtils.SingleNucleusSmartSeq2H5adOutput as H5adOutput { input: input_ids = input_ids, input_names = input_names, @@ -147,7 +147,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { ### Aggregate the Loom Files Directly ### call LoomUtils.AggregateSmartSeq2Loom as AggregateLoom { input: - loom_input = LoomOutput.loom_output, + loom_input = H5adOutput.h5ad_output, batch_id = batch_id, batch_name = batch_name, project_id = if defined(project_id) then select_first([project_id])[0] else none, @@ -163,9 +163,9 @@ workflow MultiSampleSmartSeq2SingleNucleus { ### Pipeline output ### output { # loom output, exon/intron count tsv files and the aligned bam files - File loom_output = AggregateLoom.loom_output_file + File h5ad_output = AggregateLoom.h5ad_output_file File genomic_reference_version = ReferenceCheck.genomic_ref_version - Array[File] exon_intron_count_files = LoomOutput.exon_intron_counts + Array[File] exon_intron_count_files = H5adOutput.exon_intron_counts Array[File] bam_files = RemoveDuplicatesFromBam.output_bam String pipeline_version_out = pipeline_version } diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index f504f5b6fc..9fac1c5bee 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -385,7 +385,7 @@ task SingleNucleusSmartSeq2LoomOutput { } output { - Array[File] loom_output = glob("*.loom") + Array[File] h5ad_output = glob("*.h5ad") Array[File] exon_intron_counts = glob("*exon_intron_counts.tsv") } } From 53e914d2a88f92b29e35c70ed59953cac8a187b5 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 23 Aug 2024 12:55:08 -0400 Subject: [PATCH 04/24] run with h5ad isntead o floom --- tasks/skylab/LoomUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index 9fac1c5bee..e06384f88f 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -297,7 +297,7 @@ task SingleNucleusOptimusLoomOutput { } -task SingleNucleusSmartSeq2LoomOutput { +task SingleNucleusSmartSeq2H5adOutput { input { #runtime values String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np-add-multisamplesnss2-h5ad-script" From 8bc0bb232e0c93c46277aa2540693c3cc184ede8 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 23 Aug 2024 12:58:32 -0400 Subject: [PATCH 05/24] run with h5ad isntead o floom --- .../MultiSampleSmartSeq2SingleNucleus.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 1123868f10..7f8df110d1 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -163,7 +163,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { ### Pipeline output ### output { # loom output, exon/intron count tsv files and the aligned bam files - File h5ad_output = AggregateLoom.h5ad_output_file + File loom_output = AggregateLoom.loom_output_file File genomic_reference_version = ReferenceCheck.genomic_ref_version Array[File] exon_intron_count_files = H5adOutput.exon_intron_counts Array[File] bam_files = RemoveDuplicatesFromBam.output_bam From 852fa7e1f815bd634d4de00bedb42955d7e2dec1 Mon Sep 17 00:00:00 2001 From: npetrill Date: Mon, 26 Aug 2024 15:42:16 -0400 Subject: [PATCH 06/24] new docker --- tasks/skylab/LoomUtils.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index e06384f88f..f51a9dad8a 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -300,7 +300,7 @@ task SingleNucleusOptimusLoomOutput { task SingleNucleusSmartSeq2H5adOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np-add-multisamplesnss2-h5ad-script" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" Array[File] alignment_summary_metrics Array[File] dedup_metrics @@ -347,7 +347,7 @@ task SingleNucleusSmartSeq2H5adOutput { do # creates a table with gene_id, gene_name, intron and exon counts echo "Running create_snss2_counts_csv." - python /usr/gitc/create_snss2_counts_csv.py \ + python /warptools/scripts/create_snss2_counts_csv.py \ --in-gtf ~{annotation_introns_added_gtf} \ --intron-counts ${introns_counts_files[$i]} \ --exon-counts ${exons_counts_files[$i]} \ @@ -361,8 +361,8 @@ task SingleNucleusSmartSeq2H5adOutput { echo "Success GroupQCs" # create the loom file - echo "Running create_h5ad_snss2.py" - python3 /usr/gitc/create_h5ad_snss2.py \ + echo "Running create_h5ad_snss2." + python3 /warptools/scripts/create_h5ad_snss2.py \ --qc_files "${output_prefix[$i]}.Picard_group.csv" \ --count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \ --output_h5ad_path "${output_prefix[$i]}" \ From 8dcd8443f4b1d8d86e2736e15755b02847532098 Mon Sep 17 00:00:00 2001 From: npetrill Date: Tue, 27 Aug 2024 12:32:57 -0400 Subject: [PATCH 07/24] merge h5ad files --- .../MultiSampleSmartSeq2SingleNucleus.wdl | 15 ++----- tasks/skylab/LoomUtils.wdl | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 7f8df110d1..2f67e9a3e4 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -145,17 +145,10 @@ workflow MultiSampleSmartSeq2SingleNucleus { } ### Aggregate the Loom Files Directly ### - call LoomUtils.AggregateSmartSeq2Loom as AggregateLoom { + call LoomUtils.AggregateSmartSeq2H5ad as AggregateH5ad { input: - loom_input = H5adOutput.h5ad_output, - batch_id = batch_id, - batch_name = batch_name, - project_id = if defined(project_id) then select_first([project_id])[0] else none, - project_name = if defined(project_name) then select_first([project_name])[0] else none, - library = if defined(library) then select_first([library])[0] else none, - species = if defined(species) then select_first([species])[0] else none, - organ = if defined(organ) then select_first([organ])[0] else none, - pipeline_version = "MultiSampleSmartSeq2SingleNucleus_v~{pipeline_version}" + h5ad_input = H5adOutput.h5ad_output, + batch_id = batch_id } @@ -163,7 +156,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { ### Pipeline output ### output { # loom output, exon/intron count tsv files and the aligned bam files - File loom_output = AggregateLoom.loom_output_file + File loom_output = AggregateH5ad.h5ad_output_file File genomic_reference_version = ReferenceCheck.genomic_ref_version Array[File] exon_intron_count_files = H5adOutput.exon_intron_counts Array[File] bam_files = RemoveDuplicatesFromBam.output_bam diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index f51a9dad8a..725e9c8eaf 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -214,6 +214,45 @@ task AggregateSmartSeq2Loom { } +task AggregateSmartSeq2H5ad { + input { + Array[File] h5ad_input + String batch_id + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" + Int disk = 200 + Int machine_mem_mb = 4000 + Int cpu = 1 + } + + meta { + description: "aggregate the H5AD output" + } + + command { + set -e + + # Merge the h5ad files + python3 /usr/gitc/ss2_h5ad_merge.py \ + --input-h5ad-files ~{sep=' ' h5ad_input} \ + --output-h5ad-file "~{batch_id}.h5ad" + + } + + output { + File h5ad_output_file = "~{batch_id}.h5ad" + } + + runtime { + docker: docker + cpu: cpu + memory: "~{machine_mem_mb} MiB" + disks: "local-disk ~{disk} HDD" + disk: disk + " GB" # TES + preemptible: 3 + maxRetries: 1 + } +} + task SingleNucleusOptimusLoomOutput { input { From 27d938e26a64288ae3b57cf981db05d80141d992 Mon Sep 17 00:00:00 2001 From: npetrill Date: Tue, 27 Aug 2024 13:16:59 -0400 Subject: [PATCH 08/24] new path --- tasks/skylab/LoomUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index 725e9c8eaf..1806eec185 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -232,7 +232,7 @@ task AggregateSmartSeq2H5ad { set -e # Merge the h5ad files - python3 /usr/gitc/ss2_h5ad_merge.py \ + python3 /warptools/scripts/ss2_h5ad_merge.py \ --input-h5ad-files ~{sep=' ' h5ad_input} \ --output-h5ad-file "~{batch_id}.h5ad" From 56fcdf5930b8193ae9119e324f118f352139a475 Mon Sep 17 00:00:00 2001 From: npetrill Date: Tue, 27 Aug 2024 14:29:26 -0400 Subject: [PATCH 09/24] redo h5ad merge --- .../MultiSampleSmartSeq2SingleNucleus.wdl | 1 + tasks/skylab/LoomUtils.wdl | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 2f67e9a3e4..e9b944f9bc 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -148,6 +148,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { call LoomUtils.AggregateSmartSeq2H5ad as AggregateH5ad { input: h5ad_input = H5adOutput.h5ad_output, + pipeline_version = pipeline_version, batch_id = batch_id } diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index 1806eec185..ab04db814e 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -218,6 +218,7 @@ task AggregateSmartSeq2H5ad { input { Array[File] h5ad_input String batch_id + String pipeline_version String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" Int disk = 200 Int machine_mem_mb = 4000 @@ -234,8 +235,9 @@ task AggregateSmartSeq2H5ad { # Merge the h5ad files python3 /warptools/scripts/ss2_h5ad_merge.py \ --input-h5ad-files ~{sep=' ' h5ad_input} \ - --output-h5ad-file "~{batch_id}.h5ad" - + --output-h5ad-file "~{batch_id}.h5ad" \ + --batch_id ~{batch_id} \ + --pipeline_version ~{pipeline_version} } output { From 21b5840fc02c8756d8b284e5ff06e448769aa485 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 30 Aug 2024 11:23:42 -0400 Subject: [PATCH 10/24] add verify task --- .../VerifyMultiSampleSmartSeq2SingleNucleus.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl b/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl index a5d9b8618e..8d7917f60b 100644 --- a/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl @@ -6,8 +6,8 @@ workflow VerifyMultiSampleSmartSeq2SingleNucleus { input { Array[File] truth_bams Array[File] test_bams - File truth_loom - File test_loom + File truth_h5ad + File test_h5ad Boolean? done } @@ -21,10 +21,10 @@ workflow VerifyMultiSampleSmartSeq2SingleNucleus { } } - call VerifyTasks.CompareLooms as CompareLooms { + call VerifyTasks.CompareH5adFilesGEX as CompareH5adFiles { input: - test_loom = test_loom, - truth_loom = truth_loom + test_h5ad = test_h5ad, + truth_h5ad = truth_h5ad } output{} From 5ae5eb5e6b3eff0497676f370ffcaf78cbdf0c29 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 30 Aug 2024 11:33:11 -0400 Subject: [PATCH 11/24] add verify task --- .../MultiSampleSmartSeq2SingleNucleus.wdl | 2 +- .../test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index e9b944f9bc..bceae67e83 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -157,7 +157,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { ### Pipeline output ### output { # loom output, exon/intron count tsv files and the aligned bam files - File loom_output = AggregateH5ad.h5ad_output_file + File h5ad_output = AggregateH5ad.h5ad_output_file File genomic_reference_version = ReferenceCheck.genomic_ref_version Array[File] exon_intron_count_files = H5adOutput.exon_intron_counts Array[File] bam_files = RemoveDuplicatesFromBam.output_bam diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl index 228b6b1f41..1bc460701a 100644 --- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl @@ -106,9 +106,9 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { results_path = results_path, truth_path = truth_path } - call Utilities.GetValidationInputs as GetLoom { + call Utilities.GetValidationInputs as GetH5ad { input: - input_file = MultiSampleSmartSeq2SingleNucleus.loom_output, + input_file = MultiSampleSmartSeq2SingleNucleus.h5ad_output, results_path = results_path, truth_path = truth_path } @@ -117,8 +117,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { input: truth_bams = GetBams.truth_files, test_bams = GetBams.results_files, - truth_loom = GetLoom.truth_file, - test_loom = GetLoom.results_file, + truth_h5ad = GetH5ad.truth_file, + test_h5ad = GetH5ad.results_file, done = CopyToTestResults.done } } From 202a0fcb01aca841037ba96493f229d12c01ae01 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 30 Aug 2024 11:34:52 -0400 Subject: [PATCH 12/24] add verify task --- .../test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl index 1bc460701a..3c98269a4b 100644 --- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl @@ -68,7 +68,7 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { # Collect all of the pipeline outputs into single Array[String] Array[String] pipeline_outputs = flatten([ [ # File outputs - MultiSampleSmartSeq2SingleNucleus.loom_output, + MultiSampleSmartSeq2SingleNucleus.h5ad_output, ], # Array[File] outputs MultiSampleSmartSeq2SingleNucleus.bam_files, From 67379c62b17799f32029a358728c1e23117b260c Mon Sep 17 00:00:00 2001 From: npetrill Date: Tue, 3 Sep 2024 13:57:37 -0400 Subject: [PATCH 13/24] no more loom --- .../MultiSampleSmartSeq2SingleNucleus.wdl | 10 +- tasks/skylab/H5adUtils.wdl | 134 ++++++++++++++++++ tasks/skylab/LoomUtils.wdl | 131 ----------------- 3 files changed, 139 insertions(+), 136 deletions(-) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index bceae67e83..4cfc9c97dc 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -5,7 +5,7 @@ import "../../../tasks/skylab/TrimAdapters.wdl" as TrimAdapters import "../../../tasks/skylab/StarAlign.wdl" as StarAlign import "../../../tasks/skylab/Picard.wdl" as Picard import "../../../tasks/skylab/FeatureCounts.wdl" as CountAlignments -import "../../../tasks/skylab/LoomUtils.wdl" as LoomUtils +import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/broad/Utilities.wdl" as utils workflow MultiSampleSmartSeq2SingleNucleus { @@ -129,7 +129,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { annotation_gtf = annotations_gtf } - call LoomUtils.SingleNucleusSmartSeq2H5adOutput as H5adOutput { + call H5adUtils.SingleNucleusSmartSeq2H5adOutput as H5adOutput { input: input_ids = input_ids, input_names = input_names, @@ -144,8 +144,8 @@ workflow MultiSampleSmartSeq2SingleNucleus { annotation_introns_added_gtf = annotations_gtf } - ### Aggregate the Loom Files Directly ### - call LoomUtils.AggregateSmartSeq2H5ad as AggregateH5ad { + ### Aggregate the H5ad Files Directly ### + call H5adUtils.AggregateSmartSeq2H5ad as AggregateH5ad { input: h5ad_input = H5adOutput.h5ad_output, pipeline_version = pipeline_version, @@ -156,7 +156,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { ### Pipeline output ### output { - # loom output, exon/intron count tsv files and the aligned bam files + # h5ad output, exon/intron count tsv files and the aligned bam files File h5ad_output = AggregateH5ad.h5ad_output_file File genomic_reference_version = ReferenceCheck.genomic_ref_version Array[File] exon_intron_count_files = H5adOutput.exon_intron_counts diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index f7cb7338b3..cbdd7e475c 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -552,4 +552,138 @@ task SingleNucleusSlideseqH5adOutput { output { File h5ad_output = "~{input_id}.h5ad" } +} + +task SingleNucleusSmartSeq2H5adOutput { + input { + #runtime values + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" + + Array[File] alignment_summary_metrics + Array[File] dedup_metrics + Array[File] gc_bias_summary_metrics + + # introns counts + Array[File] introns_counts + # exons counts + Array[File] exons_counts + # annotation file + File annotation_introns_added_gtf + # name of the sample + Array[String] input_ids + Array[String]? input_names + String? input_id_metadata_field + String? input_name_metadata_field + + String pipeline_version + Int preemptible = 3 + Int disk = 200 + Int machine_mem_mb = 8000 + Int cpu = 4 + } + + meta { + description: "This task will convert output from the SmartSeq2SingleNucleus pipeline into a loom file. Contrary to the SmartSeq2 single cell where there is only RSEM counts, here we have intronic and exonic counts per gene name" + } + + parameter_meta { + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command <<< + set -euo pipefail + + declare -a introns_counts_files=(~{sep=' ' introns_counts}) + declare -a exons_counts_files=(~{sep=' ' exons_counts}) + declare -a output_prefix=(~{sep=' ' input_ids}) + declare -a alignment_summary_metrics_list=(~{sep=' 'alignment_summary_metrics}) + declare -a dedup_metrics_list=(~{sep=' 'dedup_metrics}) + declare -a gc_bias_summary_metrics_list=(~{sep=' 'gc_bias_summary_metrics}) + + for (( i=0; i<${#introns_counts_files[@]}; ++i)); + do + # creates a table with gene_id, gene_name, intron and exon counts + echo "Running create_snss2_counts_csv." + python /warptools/scripts/create_snss2_counts_csv.py \ + --in-gtf ~{annotation_introns_added_gtf} \ + --intron-counts ${introns_counts_files[$i]} \ + --exon-counts ${exons_counts_files[$i]} \ + -o "${output_prefix[$i]}.exon_intron_counts.tsv" + echo "Success create_snss2_counts_csv." + + # groups the QC file into one file + echo "Running GroupQCs" + GroupQCs -f "${alignment_summary_metrics_list[$i]}" "${dedup_metrics_list[$i]}" "${gc_bias_summary_metrics_list[$i]}" \ + -t Picard -o "${output_prefix[$i]}.Picard_group" + echo "Success GroupQCs" + + # create the loom file + echo "Running create_h5ad_snss2." + python3 /warptools/scripts/create_h5ad_snss2.py \ + --qc_files "${output_prefix[$i]}.Picard_group.csv" \ + --count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \ + --output_h5ad_path "${output_prefix[$i]}" \ + --input_id ${output_prefix[$i]} \ + ~{"--input_id_metadata_field " + input_id_metadata_field} \ + ~{"--input_name_metadata_field " + input_name_metadata_field} \ + --pipeline_version ~{pipeline_version} + + echo "Success create_h5ad_snss2" + done; + >>> + + runtime { + docker: docker + cpu: cpu + memory: "~{machine_mem_mb} MiB" + disks: "local-disk ~{disk} HDD" + disk: disk + " GB" # TES + preemptible: preemptible + } + + output { + Array[File] h5ad_output = glob("*.h5ad") + Array[File] exon_intron_counts = glob("*exon_intron_counts.tsv") + } +} + +task AggregateSmartSeq2H5ad { + input { + Array[File] h5ad_input + String batch_id + String pipeline_version + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" + Int disk = 200 + Int machine_mem_mb = 4000 + Int cpu = 1 + } + + meta { + description: "aggregate the H5AD output" + } + + command { + set -e + + # Merge the h5ad files + python3 /warptools/scripts/ss2_h5ad_merge.py \ + --input-h5ad-files ~{sep=' ' h5ad_input} \ + --output-h5ad-file "~{batch_id}.h5ad" \ + --batch_id ~{batch_id} \ + --pipeline_version ~{pipeline_version} + } + + output { + File h5ad_output_file = "~{batch_id}.h5ad" + } + + runtime { + docker: docker + cpu: cpu + memory: "~{machine_mem_mb} MiB" + disks: "local-disk ~{disk} HDD" + disk: disk + " GB" # TES + preemptible: 3 + maxRetries: 1 + } } \ No newline at end of file diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index ab04db814e..977fef69f4 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -214,46 +214,6 @@ task AggregateSmartSeq2Loom { } -task AggregateSmartSeq2H5ad { - input { - Array[File] h5ad_input - String batch_id - String pipeline_version - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" - Int disk = 200 - Int machine_mem_mb = 4000 - Int cpu = 1 - } - - meta { - description: "aggregate the H5AD output" - } - - command { - set -e - - # Merge the h5ad files - python3 /warptools/scripts/ss2_h5ad_merge.py \ - --input-h5ad-files ~{sep=' ' h5ad_input} \ - --output-h5ad-file "~{batch_id}.h5ad" \ - --batch_id ~{batch_id} \ - --pipeline_version ~{pipeline_version} - } - - output { - File h5ad_output_file = "~{batch_id}.h5ad" - } - - runtime { - docker: docker - cpu: cpu - memory: "~{machine_mem_mb} MiB" - disks: "local-disk ~{disk} HDD" - disk: disk + " GB" # TES - preemptible: 3 - maxRetries: 1 - } -} task SingleNucleusOptimusLoomOutput { @@ -338,98 +298,7 @@ task SingleNucleusOptimusLoomOutput { } -task SingleNucleusSmartSeq2H5adOutput { - input { - #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" - - Array[File] alignment_summary_metrics - Array[File] dedup_metrics - Array[File] gc_bias_summary_metrics - - # introns counts - Array[File] introns_counts - # exons counts - Array[File] exons_counts - # annotation file - File annotation_introns_added_gtf - # name of the sample - Array[String] input_ids - Array[String]? input_names - String? input_id_metadata_field - String? input_name_metadata_field - - String pipeline_version - Int preemptible = 3 - Int disk = 200 - Int machine_mem_mb = 8000 - Int cpu = 4 - } - - meta { - description: "This task will convert output from the SmartSeq2SingleNucleus pipeline into a loom file. Contrary to the SmartSeq2 single cell where there is only RSEM counts, here we have intronic and exonic counts per gene name" - } - - parameter_meta { - preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" - } - - command <<< - set -euo pipefail - declare -a introns_counts_files=(~{sep=' ' introns_counts}) - declare -a exons_counts_files=(~{sep=' ' exons_counts}) - declare -a output_prefix=(~{sep=' ' input_ids}) - declare -a alignment_summary_metrics_list=(~{sep=' 'alignment_summary_metrics}) - declare -a dedup_metrics_list=(~{sep=' 'dedup_metrics}) - declare -a gc_bias_summary_metrics_list=(~{sep=' 'gc_bias_summary_metrics}) - - for (( i=0; i<${#introns_counts_files[@]}; ++i)); - do - # creates a table with gene_id, gene_name, intron and exon counts - echo "Running create_snss2_counts_csv." - python /warptools/scripts/create_snss2_counts_csv.py \ - --in-gtf ~{annotation_introns_added_gtf} \ - --intron-counts ${introns_counts_files[$i]} \ - --exon-counts ${exons_counts_files[$i]} \ - -o "${output_prefix[$i]}.exon_intron_counts.tsv" - echo "Success create_snss2_counts_csv." - - # groups the QC file into one file - echo "Running GroupQCs" - GroupQCs -f "${alignment_summary_metrics_list[$i]}" "${dedup_metrics_list[$i]}" "${gc_bias_summary_metrics_list[$i]}" \ - -t Picard -o "${output_prefix[$i]}.Picard_group" - echo "Success GroupQCs" - - # create the loom file - echo "Running create_h5ad_snss2." - python3 /warptools/scripts/create_h5ad_snss2.py \ - --qc_files "${output_prefix[$i]}.Picard_group.csv" \ - --count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \ - --output_h5ad_path "${output_prefix[$i]}" \ - --input_id ${output_prefix[$i]} \ - ~{"--input_id_metadata_field " + input_id_metadata_field} \ - ~{"--input_name_metadata_field " + input_name_metadata_field} \ - --pipeline_version ~{pipeline_version} - - echo "Success create_h5ad_snss2" - done; - >>> - - runtime { - docker: docker - cpu: cpu - memory: "~{machine_mem_mb} MiB" - disks: "local-disk ~{disk} HDD" - disk: disk + " GB" # TES - preemptible: preemptible - } - - output { - Array[File] h5ad_output = glob("*.h5ad") - Array[File] exon_intron_counts = glob("*exon_intron_counts.tsv") - } -} task SlideSeqLoomOutput { input { File bead_locations From 77366c33828431c237e7ae20817f8adda74e33b8 Mon Sep 17 00:00:00 2001 From: npetrill Date: Wed, 4 Sep 2024 10:54:17 -0400 Subject: [PATCH 14/24] changing loom to h5ad --- .../README.md | 89 +++++++++---------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index ea1f81efae..4de0be84af 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -15,7 +15,7 @@ slug: /Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README The Smart-seq2 Single Nucleus Multi-Sample (Multi-snSS2) pipeline was developed in collaboration with the [BRAIN Initiative Cell Census Network](https://biccn.org/) (BICCN) to process single-nucleus RNAseq (snRNAseq) data generated by [Smart-seq2 assays](https://www.nature.com/articles/nmeth.2639). The pipeline's workflow is written in WDL, is freely available in the [WARP repository](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl) on GitHub, and can be run by any compliant WDL runner (e.g. [Crowmell](https://github.com/broadinstitute/cromwell)). -The pipeline is designed to process snRNA-seq data from multiple cells. Overall, the workflow trims paired-end FASTQ files, aligns reads to the genome using a modified GTF, [counts intronic and exonic reads](#6-creating-the-loom-cell-by-gene-matrix), and calculates quality control metrics. +The pipeline is designed to process snRNA-seq data from multiple cells. Overall, the workflow trims paired-end FASTQ files, aligns reads to the genome using a modified GTF, [counts intronic and exonic reads](#6-creating-the-h5ad-cell-by-gene-matrix), and calculates quality control metrics. The pipeline has been scientifically validated by the BRAIN Institute. Read more in the [validation section](#validation). @@ -26,18 +26,18 @@ You can run the [Smart-seq2 Single Nucleus Multi-Sample workflow](https://github ## Quick start table -| Pipeline features | Description | Source | -|-------------------|---------------------------------------------------------------|-----------------------| -| Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006) -| Overall workflow | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub | -| Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) | -| Genomic reference sequence (for validation)| GRCm38 mouse genome primary sequence. | GENCODE GRCm38 [mouse reference files](http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz) | -| Transcriptomic reference annotation (for validation) | Modified M23 GTF built with the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl).| GENCODE [M23 GTF](https://www.gencodegenes.org/mouse/release_M23.html); modified version available [Broad references](gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf)| -| Aligner | STAR | [STAR](https://github.com/alexdobin/STAR) | -| QC metrics | Picard | [Broad Institute](https://broadinstitute.github.io/picard/) | -| Transcript quantification | featureCounts (utilities for counting reads to genomic features). | [featureCounts](http://subread.sourceforge.net/) -| Data input file format | File format in which sequencing data is provided. | [FASTQ](https://academic.oup.com/nar/article/38/6/1767/3112533) | -| Data output file formats | File formats in which Smart-seq2 output is provided. | [BAM](http://samtools.github.io/hts-specs/), Loom (counts and metrics; generated with [Loompy v.3.0.6)](http://loompy.org/), TSV (counts) | +| Pipeline features | Description | Source | +|-------------------|---------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006) +| Overall workflow | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub | +| Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) | +| Genomic reference sequence (for validation)| GRCm38 mouse genome primary sequence. | GENCODE GRCm38 [mouse reference files](http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz) | +| Transcriptomic reference annotation (for validation) | Modified M23 GTF built with the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl).| GENCODE [M23 GTF](https://www.gencodegenes.org/mouse/release_M23.html); modified version available [Broad references](gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf) | +| Aligner | STAR | [STAR](https://github.com/alexdobin/STAR) | +| QC metrics | Picard | [Broad Institute](https://broadinstitute.github.io/picard/) | +| Transcript quantification | featureCounts (utilities for counting reads to genomic features). | [featureCounts](http://subread.sourceforge.net/) +| Data input file format | File format in which sequencing data is provided. | [FASTQ](https://academic.oup.com/nar/article/38/6/1767/3112533) | +| Data output file formats | File formats in which Smart-seq2 output is provided. | [BAM](http://samtools.github.io/hts-specs/), h5ad (counts and metrics; generated with [anndata v.0.7.8)](https://anndata.readthedocs.io/en/latest/), TSV (counts) | ## Set-Up @@ -94,23 +94,23 @@ Overall, the Multi-snSS2 workflow: 1. Removes duplicate reads. 1. Calculates metrics. 1. Quantifies gene counts. -1. Merges exon counts, intron counts, and metrics into a Loom-formatted matrix. +1. Merges exon counts, intron counts, and metrics into a h5ad-formatted matrix. The tools each task employs in the Multi-snSS2 workflow are detailed in the table below. To see specific tool parameters, select the task WDL link in the table; then view the `command {}` section of the task WDL script. To view or use the exact tool software, see the task's Docker image which is specified in the task WDL `# runtime values` section as `String docker =`. -| Task name and WDL link | Tool | Software | Description | -| --- | --- | --- | --- | -| [CheckInputs.checkInputArrays](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/CheckInputs.wdl) | --- | Bash | Checks the inputs and initiates the per cell processing. | -| [StarAlign.STARGenomeRefVersion](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | --- | Bash | Reads the `tar_star_reference` file to obtain the genomic reference source, build version, and annotation version. | -| [TrimAdapters.TrimAdapters](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/TrimAdapters.wdl) | [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) | [ea-utils](https://github.com/ExpressionAnalysis/ea-utils) | Trims adapter sequences from the FASTQ inputs | -| [StarAlign.StarAlignFastqMultisample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | STAR | [STAR](https://github.com/alexdobin/STAR) | Aligns reads to the genome. | -| [Picard.RemoveDuplicatesFromBam](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | MarkDuplicates, AddOrReplaceReadGroups | [Picard](https://broadinstitute.github.io/picard/) | Removes duplicate reads, producing a new BAM output; adds regroups to deduplicated BAM. | -| [Picard.CollectMultipleMetricsMultiSample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | CollectMultipleMetrics | [Picard](https://broadinstitute.github.io/picard/) | Collects QC metrics on the deduplicated BAM files. | -| [CountAlignments.CountAlignments](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/FeatureCounts.wdl) | FeatureCounts | [Subread](http://subread.sourceforge.net/), Python 3 | Uses a custom GTF with featureCounts and Python to mark introns, create a BAM that has alignments spanning intron-exon junctions removed, and counts exons using the custom BAM and by excluding intron tags. | -| [LoomUtils.SingleNucleusSmartSeq2LoomOutput](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/LoomUtils.wdl) | Custom script: [ss2_loom_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_loom_merge.py) | Python 3 | Creates the matrix files (Loom format) for each sample. | -| [LoomUtils.AggregateSmartSeq2Loom](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/LoomUtils.wdl) | Custom script: [ss2_loom_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_loom_merge.py) | Python 3 | Aggregates the matrix files (Loom format) for each sample to produce one final Loom output. | +| Task name and WDL link | Tool | Software | Description | +|-----------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------| --- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [CheckInputs.checkInputArrays](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/CheckInputs.wdl) | --- | Bash | Checks the inputs and initiates the per cell processing. | +| [StarAlign.STARGenomeRefVersion](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | --- | Bash | Reads the `tar_star_reference` file to obtain the genomic reference source, build version, and annotation version. | +| [TrimAdapters.TrimAdapters](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/TrimAdapters.wdl) | [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) | [ea-utils](https://github.com/ExpressionAnalysis/ea-utils) | Trims adapter sequences from the FASTQ inputs | +| [StarAlign.StarAlignFastqMultisample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | STAR | [STAR](https://github.com/alexdobin/STAR) | Aligns reads to the genome. | +| [Picard.RemoveDuplicatesFromBam](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | MarkDuplicates, AddOrReplaceReadGroups | [Picard](https://broadinstitute.github.io/picard/) | Removes duplicate reads, producing a new BAM output; adds regroups to deduplicated BAM. | +| [Picard.CollectMultipleMetricsMultiSample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | CollectMultipleMetrics | [Picard](https://broadinstitute.github.io/picard/) | Collects QC metrics on the deduplicated BAM files. | +| [CountAlignments.CountAlignments](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/FeatureCounts.wdl) | FeatureCounts | [Subread](http://subread.sourceforge.net/), Python 3 | Uses a custom GTF with featureCounts and Python to mark introns, create a BAM that has alignments spanning intron-exon junctions removed, and counts exons using the custom BAM and by excluding intron tags. | +| [H5adUtils.SingleNucleusSmartSeq2H5adOutput](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) | Python 3 | Creates the matrix files (h5ad format) for each sample. | +| [H5adUtils.AggregateSmartSeq2H5ad](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [ss2_h5ad_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_h5ad_merge.py) | Python 3 | Aggregates the matrix files (h5ad format) for each sample to produce one final h5ad output. | #### 1. Trimming adapters The TrimAdapters task uses the adapter list reference file to run the [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) tool. This tool identifies the adapters in the input FASTQ files and performs clipping by using a subsampling parameter of 200,000 reads. The task outputs the trimmed FASTQ files which are then used for alignment. @@ -122,7 +122,7 @@ The StarAlignFastq task runs the STAR aligner on the trimmed FASTQ files. The ST The RemoveDuplicatesFromBam task removes multi-mapped reads, optical duplicates, and PCR duplicates from the aligned BAM. It then adds readgroup information and creates a new, coordinate-sorted aligned BAM output. #### 4. Collecting metrics -The CollectMultipleMetrics task uses the Picard tool CollectMultipleMetrics to perform QC on the deduplicated BAM file. These metrics are copied to the final cell-by-gene matrix output (Loom file). A detailed list of these metrics can be found in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md). +The CollectMultipleMetrics task uses the Picard tool CollectMultipleMetrics to perform QC on the deduplicated BAM file. These metrics are copied to the final cell-by-gene matrix output (h5ad file). A detailed list of these metrics can be found in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md). #### 5. Counting genes The CountAlignments task uses the featureCounts package to count introns and exons. First, the featureCounts tool counts intronic alignments in the deduplicated BAM using a custom GTF with annotated introns. The tool flags intronic alignments if they overlap an annotated intron by a minimum of 3 bp. @@ -131,45 +131,44 @@ Next, following pipeline processes established by the BICCN Whole Mouse Brain Wo Lastly, featureCounts uses the intermediate BAM with junctions removed to count exons. The final outputs of this step include a cell-by-gene matrix of intronic counts, a cell-by-gene matrix of exonic counts, and summary metrics for the different count types. -#### 6. Creating the Loom cell by gene matrix -The LoomUtils task combines the Picard metrics (alignment_summary_metrics, deduplication metrics, and the G/C bias summary metrics) with the featureCount exon and intron counts to create a Loom formatted cell-by-gene count matrix. +#### 6. Creating the h5ad cell by gene matrix +The H5adUtils task combines the Picard metrics (alignment_summary_metrics, deduplication metrics, and the G/C bias summary metrics) with the featureCount exon and intron counts to create an h5ad formatted cell-by-gene count matrix. Read full details for all the metrics in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md). -The cell-by-gene matrix can be examined using [Loompy software](https://linnarssonlab.org/loompy/index.html). Exonic counts are stored in the main Loom matrix which is unnamed by default. They are the default return value of the `loompy.connect()` command. Intronic counts are stored in the Loom as an additional layer which is named `intron_counts`. +The cell-by-gene matrix can be examined using [anndata software](https://anndata.readthedocs.io/en/latest/). Exonic counts are stored in the main h5ad matrix which is unnamed by default. They are the default return value of the `anndata.read_h5ad()` command. Intronic counts are stored in the h5ad as an additional layer which is named `intron_counts`. -You can also access exonic and intronic counts using Loompy's `layers()` method. For example, `loompy.connect.layers[“”]` will return the exonic counts from the output Loom file. Similarly, `loompy.connect.layers[“intron_counts”]` will return the intronic counts from the output Loom. +You can also access exonic and intronic counts using anndatas's `layers()` method. For example, `anndata.layers[“”]` will return the exonic counts from the output h5ad file. Similarly, `anndata.connect.layers[“intron_counts”]` will return the intronic counts from the output h5ad. Whole gene counts (which include both intronic and exonic counts) can be accessed by adding the intronic and exonic counts together. -Below is example Loompy code for accessing the Loom's exonic, intronic, and whole gene counts. +Below is example anndata code for accessing the h5ad's exonic, intronic, and whole gene counts. ```python -import loompy -ds = loompy.connect("/PATH/TO/File.loom") +import anndata +ds = anndata.read_h5ad("/PATH/TO/File.h5ad") count_exons = ds[:,:] #geneXcell table for the exonic read counts -count_introns = ds.layer["intron_counts"] #geneXcell table for the intronic read counts +count_introns = ds.layers["intron_counts"] #geneXcell table for the intronic read counts gene_counts = count_exons + count_introns ``` +TODO This is not right ^ -To read more about the Loom file format and use of layers, see the [Loompy documentation](https://linnarssonlab.org/loompy/index.html). +To read more about the h5ad file format and use of layers, see the [h5ad documentation](https://anndata.readthedocs.io/en/latest/). #### 7. Outputs The table below details the final outputs of the Multi-snSS2 workflow. -| Output variable name | Description | Type | -| --- | --- | --- | -| loom_output | Cell-by-gene count matrix that includes the raw exon counts (in matrix), intron counts (in matrix layer), cell metrics (column attributes) and gene IDs (row attributes). | Loom | -| exon_intron_count_files | Array of TXT files (one per cell) that contain intronic and exonic counts. | Array [TXT]| -| bam_files | Array of genome-aligned BAM files (one for each cell) generated with STAR. | Array [BAM]| -| pipeline_version_out | Version of the processing pipeline run on this data. | String | - -The Loom matrix is the default output. See the [create_loom_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_loom_snss2.py) script for the detailed code. This matrix contains the count matrices, as well as the gene and cell metrics detailed in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md). +| Output variable name | Description | Type | +|-------------------------| --- |-------------| +| h5ad_output | Cell-by-gene count matrix that includes the raw exon counts (in matrix), intron counts (in matrix layer), cell metrics (column attributes) and gene IDs (row attributes). | h5ad | +| exon_intron_count_files | Array of TXT files (one per cell) that contain intronic and exonic counts. | Array [TXT] | +| bam_files | Array of genome-aligned BAM files (one for each cell) generated with STAR. | Array [BAM] | +| pipeline_version_out | Version of the processing pipeline run on this data. | String | -To facilitate downstream analysis, the output Loom file contains both gene names and gene IDs. +The h5ad matrix is the default output. See the [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) script for the detailed code. This matrix contains the count matrices, as well as the gene and cell metrics detailed in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md). -The output Loom matrix can be converted to an H5AD file using a [custom script](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/loom_to_h5ad.py) available in the [warp-tools GitHub repository](https://github.com/broadinstitute/warp-tools). +To facilitate downstream analysis, the output h5ad file contains both gene names and gene IDs. ## Validation The Multi-snSS2 pipeline was scientifically validated by the BRAIN Initiatives Cell Census Network (BICCN) 2.0 Whole Mouse Brain Working Group. From f26d277b615835a50399e6d0e967ceb0413543e6 Mon Sep 17 00:00:00 2001 From: npetrill Date: Wed, 4 Sep 2024 13:26:48 -0400 Subject: [PATCH 15/24] try updating all instances of warp tools --- pipelines/skylab/optimus/Optimus.wdl | 2 +- pipelines/skylab/slideseq/SlideSeq.wdl | 2 +- tasks/skylab/FastqProcessing.wdl | 2 +- tasks/skylab/H5adUtils.wdl | 4 ++-- tasks/skylab/LoomUtils.wdl | 6 +++--- tasks/skylab/Metrics.wdl | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 975439c9f3..5f25ffa49d 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -91,7 +91,7 @@ workflow Optimus { String pytools_docker = "pytools:1.0.0-1661263730" String empty_drops_docker = "empty-drops:1.0.1-4.2" String star_docker = "star:1.0.1-2.7.11a-1692706072" - String warp_tools_docker_2_2_0 = "warp-tools:2.2.0" + String warp_tools_docker_2_2_0 = "warp-tools:2.3.0" String star_merge_docker = "star-merge-npz:1.2" #TODO how do we handle these? diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index c449818881..c779682f06 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -48,7 +48,7 @@ workflow SlideSeq { # docker images String pytools_docker = "pytools:1.0.0-1661263730" String picard_cloud_docker = "picard-cloud:2.26.10" - String warp_tools_docker_2_2_0 = "warp-tools:2.2.0" + String warp_tools_docker_2_2_0 = "warp-tools:2.3.0" String star_merge_docker = "star-merge-npz:1.2" String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index 20a7169d29..ea7363b738 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq { # Runtime attributes - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.0" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" Int cpu = 16 Int machine_mb = 40000 Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50 diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index cbdd7e475c..890b044680 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -557,7 +557,7 @@ task SingleNucleusSlideseqH5adOutput { task SingleNucleusSmartSeq2H5adOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" Array[File] alignment_summary_metrics Array[File] dedup_metrics @@ -652,7 +652,7 @@ task AggregateSmartSeq2H5ad { Array[File] h5ad_input String batch_id String pipeline_version - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:np_trying_to_add_Sctools_again_1" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" Int disk = 200 Int machine_mem_mb = 4000 Int cpu = 1 diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index 977fef69f4..960fccf350 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -62,7 +62,7 @@ task OptimusLoomGeneration { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:1.0.1-1681406657" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" # name of the sample String input_id # user provided id @@ -219,7 +219,7 @@ task SingleNucleusOptimusLoomOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:1.0.1-1681406657" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" # name of the sample String input_id # user provided id @@ -311,7 +311,7 @@ task SlideSeqLoomOutput { String input_id String pipeline_version - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:1.0.1-1681406657" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" Int disk_size_gb = 200 Int memory_mb = 18000 Int cpu = 4 diff --git a/tasks/skylab/Metrics.wdl b/tasks/skylab/Metrics.wdl index 1523712912..2f759071dc 100644 --- a/tasks/skylab/Metrics.wdl +++ b/tasks/skylab/Metrics.wdl @@ -165,7 +165,7 @@ task CalculateUMIsMetrics { # runtime values # Did not update docker image as this task uses loom which does not play nice with the changes - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" Int machine_mem_mb = 16000 Int cpu = 8 Int disk = ceil(size(bam_input, "Gi") * 4) @@ -240,7 +240,7 @@ task FastqMetricsSlideSeq { # Runtime attributes - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" Int cpu = 16 Int machine_mb = 40000 Int disk = ceil(size(r1_fastq, "GiB")*3) + 50 From 32389fdb55cc661839a8219ca440c3520e5ad99c Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 4 Sep 2024 17:27:11 +0000 Subject: [PATCH 16/24] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 46 +++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index f8a8c4a1a9..4b73e7e844 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,42 +1,42 @@ Pipeline Name Version Date of Last Commit -Optimus 7.6.0 2024-08-06 -Multiome 5.5.0 2024-08-06 +MultiSampleSmartSeq2SingleNucleus 1.4.2 2024-08-25-02 +MultiSampleSmartSeq2 2.2.21 2023-04-19 PairedTag 1.5.0 2024-08-06 +Optimus 7.6.0 2024-08-06 atac 2.2.3 2024-08-02 -SlideSeq 3.4.0 2024-08-06 snm3C 4.0.4 2024-08-06 -MultiSampleSmartSeq2SingleNucleus 1.4.2 2024-08-25-02 -scATAC 1.3.2 2023-08-03 SmartSeq2SingleSample 5.1.20 2023-04-19 +Multiome 5.5.0 2024-08-06 +scATAC 1.3.2 2023-08-03 BuildIndices 3.0.0 2023-12-06 -MultiSampleSmartSeq2 2.2.21 2023-04-19 -CEMBA 1.1.6 2023-12-18 +SlideSeq 3.4.0 2024-08-06 BuildCembaReferences 1.0.0 2020-11-15 -UltimaGenomicsWholeGenomeCramOnly 1.0.20 2024-08-02 +CEMBA 1.1.6 2023-12-18 GDCWholeGenomeSomaticSingleSample 1.3.2 2024-08-02 -ExomeGermlineSingleSample 3.1.22 2024-06-12 -UltimaGenomicsWholeGenomeGermline 1.0.20 2024-08-02 -WholeGenomeGermlineSingleSample 3.2.1 2024-06-12 -VariantCalling 2.2.1 2024-06-12 +UltimaGenomicsWholeGenomeCramOnly 1.0.20 2024-08-02 +JointGenotypingByChromosomePartOne 1.4.12 2023-12-18 +JointGenotypingByChromosomePartTwo 1.4.11 2023-12-18 UltimaGenomicsJointGenotyping 1.1.7 2023-12-18 JointGenotyping 1.6.10 2023-12-18 ReblockGVCF 2.2.1 2024-06-12 -JointGenotypingByChromosomePartTwo 1.4.11 2023-12-18 -JointGenotypingByChromosomePartOne 1.4.12 2023-12-18 -ExternalExomeReprocessing 3.2.2 2024-08-02 -ExternalWholeGenomeReprocessing 2.2.2 2024-08-02 -ExomeReprocessing 3.2.2 2024-08-02 -CramToUnmappedBams 1.1.3 2024-08-02 -WholeGenomeReprocessing 3.2.2 2024-08-02 -IlluminaGenotypingArray 1.12.21 2024-08-02 -Arrays 2.6.27 2024-08-02 -MultiSampleArrays 1.6.2 2024-08-02 +VariantCalling 2.2.1 2024-06-12 +WholeGenomeGermlineSingleSample 3.2.1 2024-06-12 +UltimaGenomicsWholeGenomeGermline 1.0.20 2024-08-02 +ExomeGermlineSingleSample 3.1.22 2024-06-12 ValidateChip 1.16.5 2024-08-02 +Arrays 2.6.27 2024-08-02 Imputation 1.1.13 2024-05-21 -RNAWithUMIsPipeline 1.0.16 2023-12-18 +MultiSampleArrays 1.6.2 2024-08-02 BroadInternalUltimaGenomics 1.0.21 2024-08-02 BroadInternalArrays 1.1.11 2024-08-02 BroadInternalImputation 1.1.12 2024-08-02 BroadInternalRNAWithUMIs 1.0.33 2024-08-02 +CramToUnmappedBams 1.1.3 2024-08-02 +ExternalWholeGenomeReprocessing 2.2.2 2024-08-02 +ExternalExomeReprocessing 3.2.2 2024-08-02 +WholeGenomeReprocessing 3.2.2 2024-08-02 +ExomeReprocessing 3.2.2 2024-08-02 +IlluminaGenotypingArray 1.12.21 2024-08-02 CheckFingerprint 1.0.20 2024-08-02 AnnotationFiltration 1.2.5 2023-12-18 +RNAWithUMIsPipeline 1.0.16 2023-12-18 From c5533606557bd3d8fbd183b2d906e2f88f212cbb Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 6 Sep 2024 10:29:22 -0400 Subject: [PATCH 17/24] test out markdown --- .../README.md | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index 4de0be84af..4dd9fc99d0 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -26,18 +26,18 @@ You can run the [Smart-seq2 Single Nucleus Multi-Sample workflow](https://github ## Quick start table -| Pipeline features | Description | Source | -|-------------------|---------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006) -| Overall workflow | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub | -| Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) | -| Genomic reference sequence (for validation)| GRCm38 mouse genome primary sequence. | GENCODE GRCm38 [mouse reference files](http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz) | -| Transcriptomic reference annotation (for validation) | Modified M23 GTF built with the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl).| GENCODE [M23 GTF](https://www.gencodegenes.org/mouse/release_M23.html); modified version available [Broad references](gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf) | -| Aligner | STAR | [STAR](https://github.com/alexdobin/STAR) | -| QC metrics | Picard | [Broad Institute](https://broadinstitute.github.io/picard/) | -| Transcript quantification | featureCounts (utilities for counting reads to genomic features). | [featureCounts](http://subread.sourceforge.net/) -| Data input file format | File format in which sequencing data is provided. | [FASTQ](https://academic.oup.com/nar/article/38/6/1767/3112533) | -| Data output file formats | File formats in which Smart-seq2 output is provided. | [BAM](http://samtools.github.io/hts-specs/), h5ad (counts and metrics; generated with [anndata v.0.7.8)](https://anndata.readthedocs.io/en/latest/), TSV (counts) | +| Pipeline features | Description | Source | +|---|---|---| +| Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006) +| Overall workflow | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub | +| Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) | +| Genomic reference sequence (for validation) | GRCm38 mouse genome primary sequence. | GENCODE GRCm38 [mouse reference files](http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz) | +| Transcriptomic reference annotation (for validation) | Modified M23 GTF built with the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl). | GENCODE [M23 GTF](https://www.gencodegenes.org/mouse/release_M23.html); modified version available [Broad references](gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf) | +| Aligner | STAR | [STAR](https://github.com/alexdobin/STAR) | +| QC metrics | Picard | [Broad Institute](https://broadinstitute.github.io/picard/) | +| Transcript quantification | featureCounts (utilities for counting reads to genomic features). | [featureCounts](http://subread.sourceforge.net/) +| Data input file format | File format in which sequencing data is provided. | [FASTQ](https://academic.oup.com/nar/article/38/6/1767/3112533) | +| Data output file formats | File formats in which Smart-seq2 output is provided. | [BAM](http://samtools.github.io/hts-specs/), h5ad (counts and metrics; generated with [anndata v.0.7.8)](https://anndata.readthedocs.io/en/latest/), TSV (counts) | ## Set-Up From 588f578df91beb65beee9fd1e2893f58571c5ed2 Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 6 Sep 2024 11:24:07 -0400 Subject: [PATCH 18/24] formatting --- .../README.md | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index 4dd9fc99d0..24c7222d19 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -26,18 +26,18 @@ You can run the [Smart-seq2 Single Nucleus Multi-Sample workflow](https://github ## Quick start table -| Pipeline features | Description | Source | -|---|---|---| -| Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006) -| Overall workflow | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub | -| Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) | -| Genomic reference sequence (for validation) | GRCm38 mouse genome primary sequence. | GENCODE GRCm38 [mouse reference files](http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz) | -| Transcriptomic reference annotation (for validation) | Modified M23 GTF built with the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl). | GENCODE [M23 GTF](https://www.gencodegenes.org/mouse/release_M23.html); modified version available [Broad references](gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf) | -| Aligner | STAR | [STAR](https://github.com/alexdobin/STAR) | -| QC metrics | Picard | [Broad Institute](https://broadinstitute.github.io/picard/) | -| Transcript quantification | featureCounts (utilities for counting reads to genomic features). | [featureCounts](http://subread.sourceforge.net/) -| Data input file format | File format in which sequencing data is provided. | [FASTQ](https://academic.oup.com/nar/article/38/6/1767/3112533) | -| Data output file formats | File formats in which Smart-seq2 output is provided. | [BAM](http://samtools.github.io/hts-specs/), h5ad (counts and metrics; generated with [anndata v.0.7.8)](https://anndata.readthedocs.io/en/latest/), TSV (counts) | +| Pipeline features | Description | Source | +|-------------------|---------------------------------------------------------------|-----------------------| +| Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006) +| Overall workflow | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub | +| Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) | +| Genomic reference sequence (for validation)| GRCm38 mouse genome primary sequence. | GENCODE GRCm38 [mouse reference files](http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz) | +| Transcriptomic reference annotation (for validation) | Modified M23 GTF built with the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl).| GENCODE [M23 GTF](https://www.gencodegenes.org/mouse/release_M23.html); modified version available [Broad references](gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf)| +| Aligner | STAR | [STAR](https://github.com/alexdobin/STAR) | +| QC metrics | Picard | [Broad Institute](https://broadinstitute.github.io/picard/) | +| Transcript quantification | featureCounts (utilities for counting reads to genomic features). | [featureCounts](http://subread.sourceforge.net/) +| Data input file format | File format in which sequencing data is provided. | [FASTQ](https://academic.oup.com/nar/article/38/6/1767/3112533) | +| Data output file formats | File formats in which Smart-seq2 output is provided. | [BAM](http://samtools.github.io/hts-specs/), Loom (counts and metrics; generated with [Loompy v.3.0.6)](http://loompy.org/), TSV (counts) | ## Set-Up @@ -100,15 +100,15 @@ The tools each task employs in the Multi-snSS2 workflow are detailed in the tabl To see specific tool parameters, select the task WDL link in the table; then view the `command {}` section of the task WDL script. To view or use the exact tool software, see the task's Docker image which is specified in the task WDL `# runtime values` section as `String docker =`. -| Task name and WDL link | Tool | Software | Description | -|-----------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------| --- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [CheckInputs.checkInputArrays](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/CheckInputs.wdl) | --- | Bash | Checks the inputs and initiates the per cell processing. | -| [StarAlign.STARGenomeRefVersion](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | --- | Bash | Reads the `tar_star_reference` file to obtain the genomic reference source, build version, and annotation version. | -| [TrimAdapters.TrimAdapters](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/TrimAdapters.wdl) | [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) | [ea-utils](https://github.com/ExpressionAnalysis/ea-utils) | Trims adapter sequences from the FASTQ inputs | -| [StarAlign.StarAlignFastqMultisample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | STAR | [STAR](https://github.com/alexdobin/STAR) | Aligns reads to the genome. | -| [Picard.RemoveDuplicatesFromBam](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | MarkDuplicates, AddOrReplaceReadGroups | [Picard](https://broadinstitute.github.io/picard/) | Removes duplicate reads, producing a new BAM output; adds regroups to deduplicated BAM. | -| [Picard.CollectMultipleMetricsMultiSample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | CollectMultipleMetrics | [Picard](https://broadinstitute.github.io/picard/) | Collects QC metrics on the deduplicated BAM files. | -| [CountAlignments.CountAlignments](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/FeatureCounts.wdl) | FeatureCounts | [Subread](http://subread.sourceforge.net/), Python 3 | Uses a custom GTF with featureCounts and Python to mark introns, create a BAM that has alignments spanning intron-exon junctions removed, and counts exons using the custom BAM and by excluding intron tags. | +| Task name and WDL link | Tool | Software | Description | +| --- | --- | --- | --- | +| [CheckInputs.checkInputArrays](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/CheckInputs.wdl) | --- | Bash | Checks the inputs and initiates the per cell processing. | +| [StarAlign.STARGenomeRefVersion](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | --- | Bash | Reads the `tar_star_reference` file to obtain the genomic reference source, build version, and annotation version. | +| [TrimAdapters.TrimAdapters](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/TrimAdapters.wdl) | [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) | [ea-utils](https://github.com/ExpressionAnalysis/ea-utils) | Trims adapter sequences from the FASTQ inputs | +| [StarAlign.StarAlignFastqMultisample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/StarAlign.wdl) | STAR | [STAR](https://github.com/alexdobin/STAR) | Aligns reads to the genome. | +| [Picard.RemoveDuplicatesFromBam](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | MarkDuplicates, AddOrReplaceReadGroups | [Picard](https://broadinstitute.github.io/picard/) | Removes duplicate reads, producing a new BAM output; adds regroups to deduplicated BAM. | +| [Picard.CollectMultipleMetricsMultiSample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | CollectMultipleMetrics | [Picard](https://broadinstitute.github.io/picard/) | Collects QC metrics on the deduplicated BAM files. | +| [CountAlignments.CountAlignments](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/FeatureCounts.wdl) | FeatureCounts | [Subread](http://subread.sourceforge.net/), Python 3 | Uses a custom GTF with featureCounts and Python to mark introns, create a BAM that has alignments spanning intron-exon junctions removed, and counts exons using the custom BAM and by excluding intron tags. | | [H5adUtils.SingleNucleusSmartSeq2H5adOutput](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) | Python 3 | Creates the matrix files (h5ad format) for each sample. | | [H5adUtils.AggregateSmartSeq2H5ad](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [ss2_h5ad_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_h5ad_merge.py) | Python 3 | Aggregates the matrix files (h5ad format) for each sample to produce one final h5ad output. | @@ -146,12 +146,11 @@ Below is example anndata code for accessing the h5ad's exonic, intronic, and who ```python import anndata -ds = anndata.read_h5ad("/PATH/TO/File.h5ad") -count_exons = ds[:,:] #geneXcell table for the exonic read counts -count_introns = ds.layers["intron_counts"] #geneXcell table for the intronic read counts -gene_counts = count_exons + count_introns +adata = anndata.read_h5ad("/PATH/TO/File.h5ad") +count_exons = adata.X #geneXcell table for the exonic read counts +count_introns = adata.layers["intron_counts"] #geneXcell table for the intronic read counts ``` -TODO This is not right ^ +If you would like to get the counts for both introns and exons, you can sum the counts together. To read more about the h5ad file format and use of layers, see the [h5ad documentation](https://anndata.readthedocs.io/en/latest/). @@ -159,12 +158,12 @@ To read more about the h5ad file format and use of layers, see the [h5ad documen The table below details the final outputs of the Multi-snSS2 workflow. -| Output variable name | Description | Type | -|-------------------------| --- |-------------| +| Output variable name | Description | Type | +| --- | --- | --- | | h5ad_output | Cell-by-gene count matrix that includes the raw exon counts (in matrix), intron counts (in matrix layer), cell metrics (column attributes) and gene IDs (row attributes). | h5ad | -| exon_intron_count_files | Array of TXT files (one per cell) that contain intronic and exonic counts. | Array [TXT] | -| bam_files | Array of genome-aligned BAM files (one for each cell) generated with STAR. | Array [BAM] | -| pipeline_version_out | Version of the processing pipeline run on this data. | String | +| exon_intron_count_files | Array of TXT files (one per cell) that contain intronic and exonic counts. | Array [TXT]| +| bam_files | Array of genome-aligned BAM files (one for each cell) generated with STAR. | Array [BAM]| +| pipeline_version_out | Version of the processing pipeline run on this data. | String | The h5ad matrix is the default output. See the [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) script for the detailed code. This matrix contains the count matrices, as well as the gene and cell metrics detailed in the [Multi-snSS2 Count Matrix Overview](./count-matrix-overview.md). From 065a5345651b736aa49468f4ed5616758e3d76cf Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 6 Sep 2024 11:25:56 -0400 Subject: [PATCH 19/24] formatting again --- .../README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index 24c7222d19..b4fb1445bb 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -109,8 +109,8 @@ To see specific tool parameters, select the task WDL link in the table; then vie | [Picard.RemoveDuplicatesFromBam](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | MarkDuplicates, AddOrReplaceReadGroups | [Picard](https://broadinstitute.github.io/picard/) | Removes duplicate reads, producing a new BAM output; adds regroups to deduplicated BAM. | | [Picard.CollectMultipleMetricsMultiSample](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/Picard.wdl) | CollectMultipleMetrics | [Picard](https://broadinstitute.github.io/picard/) | Collects QC metrics on the deduplicated BAM files. | | [CountAlignments.CountAlignments](https://github.com/broadinstitute/warp/tree/master/tasks/skylab/FeatureCounts.wdl) | FeatureCounts | [Subread](http://subread.sourceforge.net/), Python 3 | Uses a custom GTF with featureCounts and Python to mark introns, create a BAM that has alignments spanning intron-exon junctions removed, and counts exons using the custom BAM and by excluding intron tags. | -| [H5adUtils.SingleNucleusSmartSeq2H5adOutput](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) | Python 3 | Creates the matrix files (h5ad format) for each sample. | -| [H5adUtils.AggregateSmartSeq2H5ad](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [ss2_h5ad_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_h5ad_merge.py) | Python 3 | Aggregates the matrix files (h5ad format) for each sample to produce one final h5ad output. | +| [H5adUtils.SingleNucleusSmartSeq2H5adOutput](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [create_h5ad_snss2.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/create_h5ad_snss2.py) | Python 3 | Creates the matrix files (h5ad format) for each sample. | +| [H5adUtils.AggregateSmartSeq2H5ad](https://github.com/broadinstitute/warp/blob/master/tasks/skylab/H5adUtils.wdl) | Custom script: [ss2_h5ad_merge.py](https://github.com/broadinstitute/warp-tools/blob/develop/tools/scripts/ss2_h5ad_merge.py) | Python 3 | Aggregates the matrix files (h5ad format) for each sample to produce one final h5ad output. | #### 1. Trimming adapters The TrimAdapters task uses the adapter list reference file to run the [fastq-mcf](https://github.com/ExpressionAnalysis/ea-utils/tree/master/clipper) tool. This tool identifies the adapters in the input FASTQ files and performs clipping by using a subsampling parameter of 200,000 reads. The task outputs the trimmed FASTQ files which are then used for alignment. @@ -160,7 +160,7 @@ The table below details the final outputs of the Multi-snSS2 workflow. | Output variable name | Description | Type | | --- | --- | --- | -| h5ad_output | Cell-by-gene count matrix that includes the raw exon counts (in matrix), intron counts (in matrix layer), cell metrics (column attributes) and gene IDs (row attributes). | h5ad | +| h5ad_output | Cell-by-gene count matrix that includes the raw exon counts (in matrix), intron counts (in matrix layer), cell metrics (column attributes) and gene IDs (row attributes). | h5ad | | exon_intron_count_files | Array of TXT files (one per cell) that contain intronic and exonic counts. | Array [TXT]| | bam_files | Array of genome-aligned BAM files (one for each cell) generated with STAR. | Array [BAM]| | pipeline_version_out | Version of the processing pipeline run on this data. | String | From 8848522ab002b73460040b8edf25b1d2af80996a Mon Sep 17 00:00:00 2001 From: npetrill Date: Fri, 6 Sep 2024 11:27:05 -0400 Subject: [PATCH 20/24] formatting again --- .../Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index b4fb1445bb..0838117702 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -27,7 +27,7 @@ You can run the [Smart-seq2 Single Nucleus Multi-Sample workflow](https://github ## Quick start table | Pipeline features | Description | Source | -|-------------------|---------------------------------------------------------------|-----------------------| +|---|---|---| | Assay type | Smart-seq2 Single Nucleus | [Smart-seq2](https://www.nature.com/articles/nprot.2014.006) | Overall workflow | Quality control and transcriptome quantification. | Code available from the [WARP repository](https://github.com/broadinstitute/warp/tree/develop/pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl) in GitHub | | Workflow language | WDL | [openWDL](https://github.com/openwdl/wdl) | From 11364d90bdc90e51236bf3801e20b2acb7044c4b Mon Sep 17 00:00:00 2001 From: npetrill Date: Wed, 11 Sep 2024 11:38:17 -0400 Subject: [PATCH 21/24] changelogs and add SingleNucleusSmartSeq2LoomOutput back in --- ...iSampleSmartSeq2SingleNucleus.changelog.md | 5 + .../MultiSampleSmartSeq2SingleNucleus.wdl | 2 +- tasks/skylab/LoomUtils.wdl | 91 +++++++++++++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 67d38b59b2..f6556b3bbb 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,3 +1,8 @@ +# 2.0.0 +2024-09-11 (Dat of Last Commit) + +* Added h5ad as a format option for the cell by gene matrix output. The h5ad has the same layers and global attributes (unstructured data in h5ad) as the previous Loom output + # 1.4.2 2024-08-25-02 (Dat of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 4cfc9c97dc..38ae12ff23 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -57,7 +57,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { } # Version of this pipeline - String pipeline_version = "1.4.2" + String pipeline_version = "2.0.0" if (false) { String? none = "None" diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index 960fccf350..1c13ad7994 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -298,7 +298,98 @@ task SingleNucleusOptimusLoomOutput { } +task SingleNucleusSmartSeq2LoomOutput { + input { + #runtime values + String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" + + Array[File] alignment_summary_metrics + Array[File] dedup_metrics + Array[File] gc_bias_summary_metrics + + # introns counts + Array[File] introns_counts + # exons counts + Array[File] exons_counts + # annotation file + File annotation_introns_added_gtf + # name of the sample + Array[String] input_ids + Array[String]? input_names + String? input_id_metadata_field + String? input_name_metadata_field + + String pipeline_version + Int preemptible = 3 + Int disk = 200 + Int machine_mem_mb = 8000 + Int cpu = 4 + } + + meta { + description: "This task will convert output from the SmartSeq2SingleNucleus pipeline into a loom file. Contrary to the SmartSeq2 single cell where there is only RSEM counts, here we have intronic and exonic counts per gene name" + } + parameter_meta { + preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" + } + + command <<< + set -euo pipefail + + declare -a introns_counts_files=(~{sep=' ' introns_counts}) + declare -a exons_counts_files=(~{sep=' ' exons_counts}) + declare -a output_prefix=(~{sep=' ' input_ids}) + declare -a alignment_summary_metrics_list=(~{sep=' 'alignment_summary_metrics}) + declare -a dedup_metrics_list=(~{sep=' 'dedup_metrics}) + declare -a gc_bias_summary_metrics_list=(~{sep=' 'gc_bias_summary_metrics}) + + for (( i=0; i<${#introns_counts_files[@]}; ++i)); + do + # creates a table with gene_id, gene_name, intron and exon counts + echo "Running create_snss2_counts_csv." + python /usr/gitc/create_snss2_counts_csv.py \ + --in-gtf ~{annotation_introns_added_gtf} \ + --intron-counts ${introns_counts_files[$i]} \ + --exon-counts ${exons_counts_files[$i]} \ + -o "${output_prefix[$i]}.exon_intron_counts.tsv" + echo "Success create_snss2_counts_csv." + + # groups the QC file into one file + echo "Running GroupQCs" + GroupQCs -f "${alignment_summary_metrics_list[$i]}" "${dedup_metrics_list[$i]}" "${gc_bias_summary_metrics_list[$i]}" \ + -t Picard -o "${output_prefix[$i]}.Picard_group" + echo "Success GroupQCs" + + # create the loom file + echo "Running create_loom_snss2." + python3 /usr/gitc/create_loom_snss2.py \ + --qc_files "${output_prefix[$i]}.Picard_group.csv" \ + --count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \ + --output_loom_path "${output_prefix[$i]}.loom" \ + --input_id ${output_prefix[$i]} \ + ~{"--input_id_metadata_field " + input_id_metadata_field} \ + ~{"--input_name_metadata_field " + input_name_metadata_field} \ + --pipeline_version ~{pipeline_version} + + echo "Success create_loom_snss2" + done; + >>> + + runtime { + docker: docker + cpu: cpu + memory: "~{machine_mem_mb} MiB" + disks: "local-disk ~{disk} HDD" + disk: disk + " GB" # TES + preemptible: preemptible + } + + output { + Array[File] loom_output = glob("*.loom") + Array[File] exon_intron_counts = glob("*exon_intron_counts.tsv") + } +} task SlideSeqLoomOutput { input { File bead_locations From f30f2c5d44f2dff9efcfc2229dc75375df55eaea Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 11 Sep 2024 15:38:51 +0000 Subject: [PATCH 22/24] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index c916cdbb41..ceda6bec79 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,5 +1,5 @@ Pipeline Name Version Date of Last Commit -MultiSampleSmartSeq2SingleNucleus 1.4.2 2024-08-25-02 +MultiSampleSmartSeq2SingleNucleus 2.0.0 2024-09-11 MultiSampleSmartSeq2 2.2.21 2023-04-19 PairedTag 1.6.0 2024-08-02 Optimus 7.6.0 2024-08-06 From e2a7217bc176efd02b166d2e36f73f53f7b72bb0 Mon Sep 17 00:00:00 2001 From: npetrill Date: Wed, 11 Sep 2024 13:36:44 -0400 Subject: [PATCH 23/24] changelogs --- pipelines/skylab/atac/atac.changelog.md | 5 +++++ pipelines/skylab/atac/atac.wdl | 2 +- pipelines/skylab/multiome/Multiome.changelog.md | 4 ++++ pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/optimus/Optimus.changelog.md | 4 ++++ pipelines/skylab/optimus/Optimus.wdl | 2 +- pipelines/skylab/paired_tag/PairedTag.changelog.md | 4 ++++ pipelines/skylab/paired_tag/PairedTag.wdl | 2 +- pipelines/skylab/slideseq/SlideSeq.changelog.md | 5 +++++ pipelines/skylab/slideseq/SlideSeq.wdl | 2 +- .../smartseq2_multisample/MultiSampleSmartSeq2.changelog.md | 5 +++++ .../skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl | 2 +- .../SmartSeq2SingleSample.changelog.md | 5 +++++ .../skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl | 2 +- 14 files changed, 39 insertions(+), 7 deletions(-) diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index 544fb8ea50..d64620354a 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,3 +1,8 @@ +# 2.3.1 +2024-09-11 (Date of Last Commit) + +* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the atac pipeline + # 2.3.0 2024-08-29 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index b207e393fb..8918a8d8ad 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -46,7 +46,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.3.0" + String pipeline_version = "2.3.1" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index 98904837e8..843e4baced 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,7 @@ +# 5.6.1 +2024-09-11 (Date of Last Commit) +* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the Multiome pipeline + # 5.6.0 2024-08-02 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index d647e82944..3979a4fa7d 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { - String pipeline_version = "5.6.0" + String pipeline_version = "5.6.1" input { diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index f64f9eb4bc..9a51ef009d 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,3 +1,7 @@ +# 7.6.1 +2024-09-11 (Date of Last Commit) +* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the Optimus pipeline + # 7.6.0 2024-08-06 (Date of Last Commit) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 5f25ffa49d..eb03dfb30a 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -71,7 +71,7 @@ workflow Optimus { # version of this pipeline - String pipeline_version = "7.6.0" + String pipeline_version = "7.6.1" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index ba4a05376c..7ad1571702 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,7 @@ +# 1.6.1 +2024-09-11 (Date of Last Commit) +* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the PairedTag pipeline + # 1.6.0 2024-08-02 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index 4206f4fabb..c401d25928 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow PairedTag { - String pipeline_version = "1.6.0" + String pipeline_version = "1.6.1" input { diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index 4c49b67467..dbbe866338 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,8 @@ +# 3.4.1 +2024-09-11 (Date of Last Commit) + +* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the SlideSeq pipeline + # 3.4.0 2024-08-06 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index c779682f06..0ca5d4edc7 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow SlideSeq { - String pipeline_version = "3.4.0" + String pipeline_version = "3.4.1" input { Array[File] r1_fastq diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md index 5bc4212b9d..e5960f2ddf 100644 --- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md +++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md @@ -1,3 +1,8 @@ +# 2.2.22 +2024-09-11 (Date of Last Commit) + +* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the MultiSmartSeq2 pipeline + # 2.2.21 2023-04-19 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl index 91c9d4f882..0717f23b78 100644 --- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl +++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl @@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2 { Boolean paired_end } # Version of this pipeline - String pipeline_version = "2.2.21" + String pipeline_version = "2.2.22" if (false) { String? none = "None" diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md index 421964d455..b706b6a96e 100644 --- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md +++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md @@ -1,3 +1,8 @@ +# 5.1.21 +2024-09-11 (Date of Last Commit) + +* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the SmartSeq2SingleSample pipeline + # 5.1.20 2023-04-19 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl index efec1c4163..b9df384859 100644 --- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl +++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl @@ -36,7 +36,7 @@ workflow SmartSeq2SingleSample { } # version of this pipeline - String pipeline_version = "5.1.20" + String pipeline_version = "5.1.21" parameter_meta { genome_ref_fasta: "Genome reference in fasta format" From e12201095df272fe5cd832f1cbf136f8f3739ea4 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 11 Sep 2024 17:37:31 +0000 Subject: [PATCH 24/24] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index ceda6bec79..e675c060e4 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,15 +1,15 @@ Pipeline Name Version Date of Last Commit MultiSampleSmartSeq2SingleNucleus 2.0.0 2024-09-11 -MultiSampleSmartSeq2 2.2.21 2023-04-19 -PairedTag 1.6.0 2024-08-02 -Optimus 7.6.0 2024-08-06 -atac 2.3.0 2024-08-29 +MultiSampleSmartSeq2 2.2.22 2024-09-11 +PairedTag 1.6.1 2024-09-11 +Optimus 7.6.1 2024-09-11 +atac 2.3.1 2024-09-11 snm3C 4.0.4 2024-08-06 -SmartSeq2SingleSample 5.1.20 2023-04-19 -Multiome 5.6.0 2024-08-02 +SmartSeq2SingleSample 5.1.21 2024-09-11 +Multiome 5.6.1 2024-09-11 scATAC 1.3.2 2023-08-03 BuildIndices 3.0.0 2023-12-06 -SlideSeq 3.4.0 2024-08-06 +SlideSeq 3.4.1 2024-09-11 BuildCembaReferences 1.0.0 2020-11-15 CEMBA 1.1.7 2024-09-06 GDCWholeGenomeSomaticSingleSample 1.3.3 2024-09-06