diff --git a/pipelines/broad/arrays/imputation/Imputation.changelog.md b/pipelines/broad/arrays/imputation/Imputation.changelog.md index e96dabb6a6..e798bc7671 100644 --- a/pipelines/broad/arrays/imputation/Imputation.changelog.md +++ b/pipelines/broad/arrays/imputation/Imputation.changelog.md @@ -1,3 +1,9 @@ +# 1.1.13 +2024-05-21 (Date of Last Commit) + +* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline. + + # 1.1.12 2023-12-18 (Date of Last Commit) diff --git a/pipelines/broad/arrays/imputation/Imputation.wdl b/pipelines/broad/arrays/imputation/Imputation.wdl index 44d5a93cd0..2780b64e62 100644 --- a/pipelines/broad/arrays/imputation/Imputation.wdl +++ b/pipelines/broad/arrays/imputation/Imputation.wdl @@ -6,7 +6,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils workflow Imputation { - String pipeline_version = "1.1.12" + String pipeline_version = "1.1.13" input { Int chunkLength = 25000000 diff --git a/pipelines/broad/arrays/single_sample/Arrays.changelog.md b/pipelines/broad/arrays/single_sample/Arrays.changelog.md index ac127a1d41..0f76b576bd 100644 --- a/pipelines/broad/arrays/single_sample/Arrays.changelog.md +++ b/pipelines/broad/arrays/single_sample/Arrays.changelog.md @@ -1,3 +1,7 @@ +# 2.6.25 +2024-07-09 +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline. + # 2.6.24 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/arrays/single_sample/Arrays.wdl b/pipelines/broad/arrays/single_sample/Arrays.wdl index 75e52e5c90..2455e4ab20 100644 --- a/pipelines/broad/arrays/single_sample/Arrays.wdl +++ b/pipelines/broad/arrays/single_sample/Arrays.wdl @@ -23,7 +23,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils workflow Arrays { - String pipeline_version = "2.6.24" + String pipeline_version = "2.6.25" input { String chip_well_barcode diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md index e81dd81b4f..a7e79abe57 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.changelog.md @@ -1,3 +1,8 @@ +# 2.2.0 +2024-07-09 (Date of Last Commit) + +* Updated ReblockGVCF.wdl to run in Azure. cloud_provider is a new, required input. + # 2.1.13 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json index 58f7ac8dcd..b4e84a89a2 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.exome.inputs.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/ExomeGermlineSingleSample/truth/plumbing/master/RP-929.NA12878/NA12878_PLUMBING.rb.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider":"gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl index 6510f1c28c..e64854bf30 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl @@ -2,10 +2,11 @@ version 1.0 import "../../../../../../tasks/broad/GermlineVariantDiscovery.wdl" as Calling import "../../../../../../tasks/broad/Qc.wdl" as QC +import "../../../../../../tasks/broad/Utilities.wdl" as utils workflow ReblockGVCF { - String pipeline_version = "2.1.13" + String pipeline_version = "2.2.0" input { @@ -20,9 +21,22 @@ workflow ReblockGVCF { String? annotations_to_remove_command Boolean? move_filters_to_genotypes String gvcf_file_extension = ".g.vcf.gz" + String cloud_provider } String gvcf_basename = basename(gvcf, gvcf_file_extension) + # docker images + String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:1.0.0" + String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } call Calling.Reblock as Reblock { input: @@ -35,7 +49,8 @@ workflow ReblockGVCF { annotations_to_keep_command = annotations_to_keep_command, annotations_to_remove_command = annotations_to_remove_command, move_filters_to_genotypes = move_filters_to_genotypes, - output_vcf_filename = gvcf_basename + ".rb.g.vcf.gz" + output_vcf_filename = gvcf_basename + ".rb.g.vcf.gz", + docker_path = gatk_docker } # Validate the (g)VCF output of HaplotypeCaller @@ -51,7 +66,7 @@ workflow ReblockGVCF { calling_intervals_defined = defined(calling_interval_list), is_gvcf = true, extra_args = "--no-overlaps", - gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + docker_path = gatk_docker } output { diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json index aa862a064f..2ea7652b7e 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.index.json @@ -3,6 +3,7 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/wgs/plumbing/input/G96830.NA12878/index_in_different_location/NA12878_PLUMBING.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } \ No newline at end of file diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json index 76086ae169..81d7cc66ee 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/G96830.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json index 33b71d9875..1e903059bf 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/NA12878.ultima.json @@ -6,5 +6,6 @@ "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", "ReblockGVCF.tree_score_cutoff": 0.2, - "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS" + "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json index 5bd0ce00af..b1717905be 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Plumbing/RP-929.NA12878.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/plumbing/input/RP-929.NA12878/NA12878_PLUMBING.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json index b7dea8da45..757f468933 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C1963.CHMI_CHMI3_Nex1/CHMI_CHMI3_Nex1.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json index c2a496da55..3198fdf70d 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/C862.NA19238.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C862.NA19238/NA19238.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json index e5791f69bb..626f8fb268 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12878.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12878/NA12878.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json index 28fe2ca47f..35b71a1271 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/D5327.NA12891.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12891/NA12891.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json index 33eabdc0c5..53554e2d84 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json index 5518401aee..8e1d594362 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json index 67cd0891c3..561e7dfea4 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12891.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json index 84acd3b6eb..c8ae0e0e8f 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G94982.NA12892.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json index 2ff9d8a64a..881ce23794 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/G96830.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json index 5e99cbce58..459a89bc2f 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.bge.json @@ -7,5 +7,6 @@ "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", "ReblockGVCF.annotations_to_remove_command": "--format-annotations-to-remove PRI", "ReblockGVCF.move_filters_to_genotypes": true, - "ReblockGVCF.gvcf_file_extension": ".gvcf.gz" + "ReblockGVCF.gvcf_file_extension": ".gvcf.gz", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json index 4dd0f918da..ac12ce5429 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/NA12878.ultima.json @@ -6,5 +6,6 @@ "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", "ReblockGVCF.tree_score_cutoff": 0.2, - "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS" + "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json index 9a2ad60cf6..5bed19c39a 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-1535.NA17-308.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/RP-1535.NA17-308/NA17-308.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json index b3fbe04a0d..8136913847 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/Scientific/RP-518.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json index 5bd0ce00af..b1717905be 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Plumbing/RP-929.NA12878.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/plumbing/input/RP-929.NA12878/NA12878_PLUMBING.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json index b7dea8da45..757f468933 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C1963.CHMI_CHMI3_Nex1.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C1963.CHMI_CHMI3_Nex1/CHMI_CHMI3_Nex1.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json index c2a496da55..3198fdf70d 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/C862.NA19238.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/C862.NA19238/NA19238.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json index e5791f69bb..626f8fb268 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12878.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12878/NA12878.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json index 28fe2ca47f..35b71a1271 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12891.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12891/NA12891.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json index 9235c26a47..f5e1898ba6 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/D5327.NA12892.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/D5327.NA12892/NA12892.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json index 9a2ad60cf6..5bed19c39a 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/exome/Scientific/RP-1535.NA17-308.json @@ -3,5 +3,6 @@ "ReblockGVCF.gvcf_index": "gs://broad-gotc-test-storage/reblock_gvcf/exome/scientific/input/RP-1535.NA17-308/NA17-308.g.vcf.gz.tbi", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json index 76086ae169..81d7cc66ee 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/G96830.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json index 33b71d9875..1e903059bf 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Plumbing/NA12878.ultima.json @@ -6,5 +6,6 @@ "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", "ReblockGVCF.tree_score_cutoff": 0.2, - "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS" + "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json index 33eabdc0c5..53554e2d84 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94794.CHMI_CHMI3_WGS2.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json index 5518401aee..8e1d594362 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json index 67cd0891c3..561e7dfea4 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12891.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json index 84acd3b6eb..c8ae0e0e8f 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G94982.NA12892.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json index 2ff9d8a64a..881ce23794 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/G96830.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json index 4dd0f918da..ac12ce5429 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/NA12878.ultima.json @@ -6,5 +6,6 @@ "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", "ReblockGVCF.tree_score_cutoff": 0.2, - "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS" + "ReblockGVCF.annotations_to_keep_command": "--annotations-to-keep TREE_SCORE --annotations-to-keep ASSEMBLED_HAPS --annotations-to-keep FILTERED_HAPS", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json index b3fbe04a0d..8136913847 100644 --- a/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json +++ b/pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/test_inputs/wgs/Scientific/RP-518.NA12878.json @@ -4,5 +4,6 @@ "ReblockGVCF.calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", "ReblockGVCF.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "ReblockGVCF.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict" + "ReblockGVCF.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ReblockGVCF.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md index 2e8fa9a792..45a45b81eb 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md @@ -1,3 +1,8 @@ +# 3.1.21 +2024-07-09 (Date of Last Commit) + +* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers + # 3.1.20 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl index 76d47d5515..1197dcbdd2 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl @@ -40,11 +40,12 @@ import "../../../../../../tasks/broad/BamProcessing.wdl" as Processing import "../../../../../../tasks/broad/BamToCram.wdl" as ToCram import "../../../../../../pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl" as ToGvcf import "../../../../../../structs/dna_seq/DNASeqStructs.wdl" +import "../../../../../../tasks/broad/Utilities.wdl" as utils # WORKFLOW DEFINITION workflow ExomeGermlineSingleSample { - String pipeline_version = "3.1.20" + String pipeline_version = "3.1.21" input { @@ -62,6 +63,21 @@ workflow ExomeGermlineSingleSample { Boolean skip_reblocking = false Boolean provide_bam_output = false + + String cloud_provider + } + + # docker images + String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:1.0.0" + String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } # Not overridable: @@ -141,7 +157,8 @@ workflow ExomeGermlineSingleSample { base_file_name = sample_and_unmapped_bams.base_file_name, final_vcf_base_name = final_gvcf_base_name, agg_preemptible_tries = papi_settings.agg_preemptible_tries, - skip_reblocking = skip_reblocking + skip_reblocking = skip_reblocking, + cloud_provider = cloud_provider } call QC.CollectHsMetrics as CollectHsMetrics { diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json index a2f7bbfb29..17c06f79b6 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Plumbing/RP-929.NA12878.json @@ -57,5 +57,6 @@ "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, - "ExomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true + "ExomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true, + "ExomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json index 1c4ba00d72..163e2f8265 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C1963.CHMI_CHMI3_Nex1.json @@ -63,5 +63,6 @@ }, "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, - "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false + "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, + "ExomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json index f884c22730..c90ddcf59f 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/C862.NA19238.json @@ -71,5 +71,6 @@ }, "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, - "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false + "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, + "ExomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json index 79b98889b0..a302f38a4f 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12878.json @@ -56,5 +56,6 @@ }, "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, - "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false + "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, + "ExomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json index 72722de383..945d7bb79c 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12891.json @@ -56,5 +56,6 @@ }, "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, - "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false + "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, + "ExomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json index 028be345a3..67ee0a8bd0 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/D5327.NA12892.json @@ -57,5 +57,6 @@ }, "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, - "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false + "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, + "ExomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json index ab6c472216..1d8834a98e 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/test_inputs/Scientific/RP-1535.NA17-308.json @@ -73,5 +73,6 @@ }, "ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, - "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false + "ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, + "ExomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md index 8ac03e295e..88c0a43cbf 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md @@ -1,3 +1,8 @@ +# 1.0.18 +2024-07-09 (Date of Last Commit) + +* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers. This change does not affect this pipeline + # 1.0.17 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl index ca0c578b74..b9270c3b3c 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl @@ -50,7 +50,7 @@ workflow UltimaGenomicsWholeGenomeGermline { filtering_model_no_gt_name: "String describing the optional filtering model; default set to rf_model_ignore_gt_incl_hpol_runs" } - String pipeline_version = "1.0.17" + String pipeline_version = "1.0.18" References references = alignment_references.references @@ -196,7 +196,8 @@ workflow UltimaGenomicsWholeGenomeGermline { ref_fasta = alignment_references.references.ref_fasta, ref_fasta_index = alignment_references.references.ref_fasta_index, tree_score_cutoff = vcf_post_processing.remove_low_tree_score_sites_cutoff, - annotations_to_keep_command = vcf_post_processing.annotations_to_keep_command_for_reblocking + annotations_to_keep_command = vcf_post_processing.annotations_to_keep_command_for_reblocking, + cloud_provider = "gcp" } # Outputs that will be retained when execution is complete diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md index f5eac9286d..f8be38c09f 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md @@ -1,3 +1,9 @@ +# 3.2.0 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input. +* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers; this change does not affect this pipeline + # 3.1.21 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl index a8bd8bfd9f..bc87cfd0cb 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl @@ -40,7 +40,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl" workflow WholeGenomeGermlineSingleSample { - String pipeline_version = "3.1.21" + String pipeline_version = "3.2.0" input { @@ -68,6 +68,8 @@ workflow WholeGenomeGermlineSingleSample { Boolean use_bwa_mem = true Boolean allow_empty_ref_alt = false Boolean use_dragen_hard_filtering = false + + String cloud_provider } if (dragen_functional_equivalence_mode && dragen_maximum_quality_mode) { @@ -192,7 +194,8 @@ workflow WholeGenomeGermlineSingleSample { final_vcf_base_name = final_gvcf_base_name, agg_preemptible_tries = papi_settings.agg_preemptible_tries, use_gatk3_haplotype_caller = use_gatk3_haplotype_caller_, - use_dragen_hard_filtering = use_dragen_hard_filtering_ + use_dragen_hard_filtering = use_dragen_hard_filtering_, + cloud_provider = cloud_provider } if (provide_bam_output) { diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json index a2f8532cf7..871c5589de 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json @@ -50,7 +50,7 @@ "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz", "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi", "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list", - + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp", "WholeGenomeGermlineSingleSample.papi_settings": { "preemptible_tries": 3, "agg_preemptible_tries": 3 diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json index 772ee521b8..321ecbcc02 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json @@ -58,5 +58,6 @@ "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true, "WholeGenomeGermlineSingleSample.CollectWgsMetrics.read_length": 250, - "WholeGenomeGermlineSingleSample.CollectRawWgsMetrics.read_length": 250 + "WholeGenomeGermlineSingleSample.CollectRawWgsMetrics.read_length": 250, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json index 96f903e80d..a06a620b6c 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_best_results.json @@ -62,5 +62,6 @@ }, "WholeGenomeGermlineSingleSample.dragen_maximum_quality_mode": true, - "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true + "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } \ No newline at end of file diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json index 50b81f310b..928deacdb5 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/dragen_mode_functional_equivalence.json @@ -63,5 +63,6 @@ }, "WholeGenomeGermlineSingleSample.dragen_functional_equivalence_mode": true, - "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true + "WholeGenomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } \ No newline at end of file diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json index 8371849045..33374a597f 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94794.CHMI_CHMI3_WGS2.json @@ -73,5 +73,6 @@ "WholeGenomeGermlineSingleSample.papi_settings": { "preemptible_tries": 3, "agg_preemptible_tries": 3 - } + }, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json index 94f90073c8..c625c8b4c3 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_best_results.json @@ -83,5 +83,6 @@ }, "WholeGenomeGermlineSingleSample.dragen_maximum_quality_mode": true, - "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2 + "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json index c4b9608f29..271675b702 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.dragen_mode_functional_equivalence.json @@ -82,5 +82,6 @@ }, "WholeGenomeGermlineSingleSample.dragen_functional_equivalence_mode": true, - "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2 + "WholeGenomeGermlineSingleSample.BamToGvcf.HaplotypeCallerGATK4.memory_multiplier":2, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json index 344e66dd9a..96cac538de 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12878.json @@ -73,5 +73,6 @@ "WholeGenomeGermlineSingleSample.papi_settings": { "preemptible_tries": 3, "agg_preemptible_tries": 3 - } + }, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json index 650c41990f..eeccd9275b 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12891.json @@ -76,5 +76,6 @@ "WholeGenomeGermlineSingleSample.papi_settings": { "preemptible_tries": 3, "agg_preemptible_tries": 3 - } + }, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json index 9372e66905..5558036b60 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G94982.NA12892.json @@ -74,5 +74,6 @@ "WholeGenomeGermlineSingleSample.papi_settings": { "preemptible_tries": 3, "agg_preemptible_tries": 3 - } + }, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json index 7f5e219d59..b4e3b1574a 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/G96830.NA12878.json @@ -73,5 +73,6 @@ "WholeGenomeGermlineSingleSample.papi_settings": { "preemptible_tries": 3, "agg_preemptible_tries": 3 - } + }, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json index 2032139bad..035b62a322 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Scientific/RP-518.NA12878.json @@ -50,5 +50,6 @@ "WholeGenomeGermlineSingleSample.papi_settings": { "preemptible_tries": 3, "agg_preemptible_tries": 3 - } + }, + "WholeGenomeGermlineSingleSample.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md index ff61f46f1d..b4eb529e7d 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md +++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md @@ -1,3 +1,8 @@ +# 2.2.0 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input. + # 2.1.19 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl index bbe014fc00..ba265a80ef 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl +++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl @@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks workflow VariantCalling { - String pipeline_version = "2.1.19" + String pipeline_version = "2.2.0" input { @@ -36,6 +36,33 @@ workflow VariantCalling { Boolean use_gatk3_haplotype_caller = false Boolean skip_reblocking = false Boolean use_dragen_hard_filtering = false + String cloud_provider + } + + # docker images + String gatk_docker_gcp = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + String gatk_docker_azure = "dsppipelinedev.azurecr.io/gatk_reduced_layers:1.0.0" + String gatk_docker = if cloud_provider == "gcp" then gatk_docker_gcp else gatk_docker_azure + + String gatk_1_3_docker_gcp = "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384" + String gatk_1_3_docker_azure = "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384" + String gatk_1_3_docker = if cloud_provider == "gcp" then gatk_1_3_docker_gcp else gatk_1_3_docker_azure + + String picard_python_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039" + String picard_python_docker_azure = "dsppipelinedev.azurecr.io/picard-python:1.0.0-2.26.10-1663951039" + String picard_python_docker = if cloud_provider == "gcp" then picard_python_docker_gcp else picard_python_docker_azure + + String picard_cloud_docker_gcp = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" + String picard_cloud_docker_azure = "dsppipelinedev.azurecr.io/picard-cloud:2.26.10" + String picard_cloud_docker = if cloud_provider == "gcp" then picard_cloud_docker_gcp else picard_cloud_docker_azure + + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call Utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } parameter_meta { @@ -51,7 +78,8 @@ workflow VariantCalling { ref_dict = ref_dict, alignment = input_bam, alignment_index = input_bam_index, - str_table_file = select_first([ref_str]) + str_table_file = select_first([ref_str]), + docker = gatk_docker } } @@ -62,7 +90,8 @@ workflow VariantCalling { input: interval_list = calling_interval_list, scatter_count = haplotype_scatter_count, - break_bands_at_multiples_of = break_bands_at_multiples_of + break_bands_at_multiples_of = break_bands_at_multiples_of, + docker = picard_python_docker } # We need disk to localize the sharded input and output due to the scatter for HaplotypeCaller. @@ -86,7 +115,8 @@ workflow VariantCalling { ref_fasta_index = ref_fasta_index, contamination = contamination, preemptible_tries = agg_preemptible_tries, - hc_scatter = hc_divisor + hc_scatter = hc_divisor, + docker = gatk_1_3_docker } } @@ -109,7 +139,9 @@ workflow VariantCalling { use_dragen_hard_filtering = use_dragen_hard_filtering, use_spanning_event_genotyping = use_spanning_event_genotyping, dragstr_model = DragstrAutoCalibration.dragstr_model, - preemptible_tries = agg_preemptible_tries + preemptible_tries = agg_preemptible_tries, + gatk_docker = gatk_docker + } if (use_dragen_hard_filtering) { @@ -119,7 +151,8 @@ workflow VariantCalling { input_vcf_index = HaplotypeCallerGATK4.output_vcf_index, make_gvcf = make_gvcf, vcf_basename = base_file_name, - preemptible_tries = agg_preemptible_tries + preemptible_tries = agg_preemptible_tries, + gatk_docker = gatk_docker } } @@ -130,7 +163,8 @@ workflow VariantCalling { input_bam = HaplotypeCallerGATK4.bamout, output_bam_basename = final_vcf_base_name, preemptible_tries = agg_preemptible_tries, - compression_level = 2 + compression_level = 2, + docker = picard_cloud_docker } } } @@ -147,7 +181,8 @@ workflow VariantCalling { input_vcfs = vcfs_to_merge, input_vcfs_indexes = vcf_indices_to_merge, output_vcf_name = final_vcf_base_name + hard_filter_suffix + merge_suffix, - preemptible_tries = agg_preemptible_tries + preemptible_tries = agg_preemptible_tries, + docker = picard_cloud_docker } if (make_gvcf && !skip_reblocking) { @@ -158,7 +193,8 @@ workflow VariantCalling { ref_fasta = ref_fasta, ref_fasta_index = ref_fasta_index, ref_dict = ref_dict, - output_vcf_filename = basename(MergeVCFs.output_vcf, ".g.vcf.gz") + ".rb.g.vcf.gz" + output_vcf_filename = basename(MergeVCFs.output_vcf, ".g.vcf.gz") + ".rb.g.vcf.gz", + docker_path = gatk_docker } } @@ -183,7 +219,7 @@ workflow VariantCalling { calling_interval_list = calling_interval_list, is_gvcf = make_gvcf, extra_args = if (skip_reblocking == false) then "--no-overlaps" else "", - gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0", + docker_path = gatk_docker, preemptible_tries = agg_preemptible_tries } @@ -198,7 +234,8 @@ workflow VariantCalling { ref_dict = ref_dict, evaluation_interval_list = evaluation_interval_list, is_gvcf = make_gvcf, - preemptible_tries = agg_preemptible_tries + preemptible_tries = agg_preemptible_tries, + docker = picard_cloud_docker } output { diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json index 4e4be85272..c13ceb45f8 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/G96830.NA12878.json @@ -17,5 +17,6 @@ "VariantCalling.haplotype_scatter_count": 10, "VariantCalling.break_bands_at_multiples_of": 100000, "VariantCalling.agg_preemptible_tries": 3, - "VariantCalling.use_gatk3_haplotype_caller": true + "VariantCalling.use_gatk3_haplotype_caller": true, + "VariantCalling.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json index 1e89ca58f5..78f6c994e7 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json +++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/Plumbing/RP-929.NA12878.json @@ -17,5 +17,6 @@ "VariantCalling.haplotype_scatter_count": 10, "VariantCalling.break_bands_at_multiples_of": 0, "VariantCalling.agg_preemptible_tries": 3, - "VariantCalling.use_gatk3_haplotype_caller": false + "VariantCalling.use_gatk3_haplotype_caller": false, + "VariantCalling.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json index 1e89ca58f5..78f6c994e7 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json +++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/exome/Plumbing/RP-929.NA12878.json @@ -17,5 +17,6 @@ "VariantCalling.haplotype_scatter_count": 10, "VariantCalling.break_bands_at_multiples_of": 0, "VariantCalling.agg_preemptible_tries": 3, - "VariantCalling.use_gatk3_haplotype_caller": false + "VariantCalling.use_gatk3_haplotype_caller": false, + "VariantCalling.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json index 4e4be85272..c13ceb45f8 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/variant_calling/test_inputs/wgs/Plumbing/G96830.NA12878.json @@ -17,5 +17,6 @@ "VariantCalling.haplotype_scatter_count": 10, "VariantCalling.break_bands_at_multiples_of": 100000, "VariantCalling.agg_preemptible_tries": 3, - "VariantCalling.use_gatk3_haplotype_caller": true + "VariantCalling.use_gatk3_haplotype_caller": true, + "VariantCalling.cloud_provider": "gcp" } diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md index 5c974c8c30..21120b9c7e 100644 --- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md +++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md @@ -1,3 +1,8 @@ +# 1.0.18 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline + # 1.0.17 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl index 17d4fecfb8..6cc165522f 100644 --- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl +++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl @@ -43,7 +43,7 @@ workflow UltimaGenomicsWholeGenomeCramOnly { save_bam_file: "If true, then save intermeidate ouputs used by germline pipeline (such as the output BAM) otherwise they won't be kept as outputs." } - String pipeline_version = "1.0.17" + String pipeline_version = "1.0.18" References references = alignment_references.references diff --git a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md index a5097f1fa9..7e775a6553 100644 --- a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md +++ b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.changelog.md @@ -1,3 +1,8 @@ +# 1.12.19 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers + # 1.12.18 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl index 2443bc8bcb..314995c5db 100644 --- a/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl +++ b/pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl @@ -21,7 +21,7 @@ import "../../../../tasks/broad/Qc.wdl" as Qc workflow IlluminaGenotypingArray { - String pipeline_version = "1.12.18" + String pipeline_version = "1.12.19" input { String sample_alias diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md index 0ac74c9794..4ff223caf2 100644 --- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md +++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md @@ -1,3 +1,8 @@ +# 1.1.11 +2024-05-21 (Date of Last Commit) + +* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers; this change does not affect this pipeline + # 1.1.10 2023-12-18 (Date of Last Commit) diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl index 3021fe6a4c..7b5e2958ce 100644 --- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl +++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl @@ -9,7 +9,7 @@ workflow BroadInternalImputation { description: "Push outputs of Imputation.wdl to TDR dataset table ImputationOutputsTable and split out Imputation arrays into ImputationWideOutputsTable." allowNestedInputs: true } - String pipeline_version = "1.1.10" + String pipeline_version = "1.1.11" input { # inputs to wrapper task diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md index 878dc4bc33..acfc450408 100644 --- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md +++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md @@ -1,3 +1,8 @@ +# 1.1.9 +2024-07-09 + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline. + # 1.1.8 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl index b7bf1c183e..6a9b1b195d 100644 --- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl +++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl @@ -9,7 +9,7 @@ workflow BroadInternalArrays { description: "Push outputs of Arrays.wdl to TDR dataset table ArraysOutputsTable." } - String pipeline_version = "1.1.8" + String pipeline_version = "1.1.9" input { # inputs to wrapper task diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md index 8aa12ad228..433cc9baa3 100644 --- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md +++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md @@ -1,3 +1,8 @@ +# 1.0.19 +2024-07-09 (Date of Last Commit) + +* Updated ReblockGVCF.wdl to run in Azure. + # 1.0.18 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl index 946c9196dd..df1b6e664b 100644 --- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl +++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl @@ -6,7 +6,7 @@ import "../../../../../../../pipelines/broad/qc/CheckFingerprint.wdl" as FP workflow BroadInternalUltimaGenomics { - String pipeline_version = "1.0.18" + String pipeline_version = "1.0.19" input { diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md index da7f7a4ef4..407ff0c43d 100644 --- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md +++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md @@ -1,3 +1,7 @@ +# 1.0.31 +2024-07-09 +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline + # 1.0.30 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl index d4f5316e89..766f087263 100644 --- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl +++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl @@ -7,7 +7,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils workflow BroadInternalRNAWithUMIs { - String pipeline_version = "1.0.30" + String pipeline_version = "1.0.31" input { # input needs to be either "hg19" or "hg38" diff --git a/pipelines/broad/qc/CheckFingerprint.changelog.md b/pipelines/broad/qc/CheckFingerprint.changelog.md index 697f5a0491..9f11431c50 100644 --- a/pipelines/broad/qc/CheckFingerprint.changelog.md +++ b/pipelines/broad/qc/CheckFingerprint.changelog.md @@ -1,3 +1,8 @@ +# 1.0.18 +2024-07-00 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers; this does not affect this pipeline + # 1.0.17 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/qc/CheckFingerprint.wdl b/pipelines/broad/qc/CheckFingerprint.wdl index 0338466c3b..dcc7ee057f 100644 --- a/pipelines/broad/qc/CheckFingerprint.wdl +++ b/pipelines/broad/qc/CheckFingerprint.wdl @@ -24,7 +24,7 @@ import "../../../tasks/broad/Qc.wdl" as Qc workflow CheckFingerprint { - String pipeline_version = "1.0.17" + String pipeline_version = "1.0.18" input { File? input_vcf diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md index 1d390dcbb6..f42b61ac28 100644 --- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 3.2.0 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input. + # 3.1.20 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl index 2815be227e..b63e0501f5 100644 --- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl @@ -7,7 +7,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl" workflow ExomeReprocessing { - String pipeline_version = "3.1.20" + String pipeline_version = "3.2.0" input { File? input_cram @@ -32,6 +32,8 @@ workflow ExomeReprocessing { File target_interval_list File bait_interval_list String bait_set_name + + String cloud_provider } call ToUbams.CramToUnmappedBams { @@ -64,6 +66,7 @@ workflow ExomeReprocessing { target_interval_list = target_interval_list, bait_interval_list = bait_interval_list, bait_set_name = bait_set_name, + cloud_provider = cloud_provider } output { diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md index 33346a0acb..ea7abd045b 100644 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 3.2.0 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input. + # 3.1.22 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl index 6983382757..49db5591dc 100644 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl @@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy workflow ExternalExomeReprocessing { - String pipeline_version = "3.1.22" + String pipeline_version = "3.2.0" input { @@ -34,6 +34,8 @@ workflow ExternalExomeReprocessing { String destination_cloud_path String vault_token_path String google_account_vault_path + + String cloud_provider } call ExomeReprocessing.ExomeReprocessing { @@ -53,7 +55,8 @@ workflow ExternalExomeReprocessing { fingerprint_genotypes_index = fingerprint_genotypes_index, cram_ref_fasta = cram_ref_fasta, cram_ref_fasta_index = cram_ref_fasta_index, - papi_settings = papi_settings + papi_settings = papi_settings, + cloud_provider = cloud_provider } call Copy.CopyFilesFromCloudToCloud { diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md index 29cf82e5d1..6ad12f66c0 100644 --- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 2.2.0 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input. + # 2.1.22 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl index dda7f0fa6f..341be24f78 100644 --- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl @@ -6,7 +6,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy workflow ExternalWholeGenomeReprocessing { - String pipeline_version = "2.1.22" + String pipeline_version = "2.2.0" input { File? input_cram @@ -33,6 +33,8 @@ workflow ExternalWholeGenomeReprocessing { String destination_cloud_path String vault_token_path String google_account_vault_path + + String cloud_provider } call WholeGenomeReprocessing.WholeGenomeReprocessing { @@ -51,7 +53,8 @@ workflow ExternalWholeGenomeReprocessing { fingerprint_genotypes_index = fingerprint_genotypes_index, papi_settings = papi_settings, wgs_coverage_interval_list = wgs_coverage_interval_list, - scatter_settings = scatter_settings + scatter_settings = scatter_settings, + cloud_provider = cloud_provider } call Copy.CopyFilesFromCloudToCloud { diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md index 6cb6ce1bf4..856a1a2f1c 100644 --- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 3.2.0 +2024-07-09 (Date of Last Commit) + +* Updated tasks GermlineVariantDiscovery.wdl and QC.wdl to allow multi-cloud dockers. cloud_provider is a new, required input. + # 3.1.21 2024-07-01 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl index efd5c5deda..a65e723ad3 100644 --- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl @@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl" workflow WholeGenomeReprocessing { - String pipeline_version = "3.1.21" + String pipeline_version = "3.2.0" input { File? input_cram @@ -29,6 +29,8 @@ workflow WholeGenomeReprocessing { File? fingerprint_genotypes_index File wgs_coverage_interval_list + + String cloud_provider } call ToUbams.CramToUnmappedBams { @@ -57,7 +59,8 @@ workflow WholeGenomeReprocessing { fingerprint_genotypes_file = fingerprint_genotypes_file, fingerprint_genotypes_index = fingerprint_genotypes_index, papi_settings = papi_settings, - wgs_coverage_interval_list = wgs_coverage_interval_list + wgs_coverage_interval_list = wgs_coverage_interval_list, + cloud_provider = cloud_provider } output { diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index 94a1c07022..2cac5dc595 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,10 +1,13 @@ +# 5.3.0 + +* Updated the Multiome.wdl to run on Azure. cloud_provider is a new, required input. + # 5.2.0 2024-07-09 (Date of Last Commit) * Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the ATAC fragment h5ad, the gene expression h5ad (in the data.uns), and the gene expression library metrics CSV output; default is set to null * Added test statements again for GH action (to release from develop). Will probably revert - # 5.1.0 2024-06-28 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 607d78d8f9..9da6addf65 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -3,13 +3,17 @@ version 1.0 import "../../../pipelines/skylab/multiome/atac.wdl" as atac import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils +import "https://raw.githubusercontent.com/aawdeh/CellBender/aa-cbwithoutcuda/wdl/cellbender_remove_background_azure.wdl" as CellBender_no_cuda import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender +import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { - String pipeline_version = "5.2.0" + String pipeline_version = "5.3.0" + input { + String cloud_provider String input_id # Additional library aliquot ID String? nhash_id @@ -28,7 +32,6 @@ workflow Multiome { Boolean ignore_r1_read_length = false String star_strand_mode = "Forward" Boolean count_exons = false - File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" String? soloMultiMappers # ATAC inputs @@ -36,7 +39,8 @@ workflow Multiome { Array[File] atac_r1_fastq Array[File] atac_r2_fastq Array[File] atac_r3_fastq - + # VM size used for several ATAC tasks + String vm_size = "Standard_M128s" # BWA tar reference File tar_bwa_reference # Chromosone sizes @@ -44,16 +48,41 @@ workflow Multiome { # Trimadapters input String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" - # Whitelist - File atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" # CellBender Boolean run_cellbender = false } + # Determine docker prefix based on cloud provider + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # Define docker images + String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229" + + # Define all whitelist files + File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" + File gcp_atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" + File azure_gex_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" + File azure_atac_whitelist = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" + + # Determine which whitelist files to use based on cloud provider + File gex_whitelist = if cloud_provider == "gcp" then gcp_gex_whitelist else azure_gex_whitelist + File atac_whitelist = if cloud_provider == "gcp" then gcp_atac_whitelist else azure_atac_whitelist + + # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + # Call the Optimus workflow call optimus.Optimus as Optimus { input: + cloud_provider = cloud_provider, counting_mode = counting_mode, r1_fastq = gex_r1_fastq, r2_fastq = gex_r2_fastq, @@ -71,12 +100,14 @@ workflow Multiome { ignore_r1_read_length = ignore_r1_read_length, star_strand_mode = star_strand_mode, count_exons = count_exons, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider } # Call the ATAC workflow call atac.ATAC as Atac { input: + cloud_provider = cloud_provider, read1_fastq_gzipped = atac_r1_fastq, read2_fastq_gzipped = atac_r2_fastq, read3_fastq_gzipped = atac_r3_fastq, @@ -85,12 +116,14 @@ workflow Multiome { chrom_sizes = chrom_sizes, whitelist = atac_whitelist, adapter_seq_read1 = adapter_seq_read1, + vm_size = vm_size, annotations_gtf = annotations_gtf, atac_nhash_id = nhash_id, adapter_seq_read3 = adapter_seq_read3 } call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes { input: + docker_path = docker_prefix + snap_atac_docker_image, atac_h5ad = Atac.snap_metrics, gex_h5ad = Optimus.h5ad_output_file, gex_whitelist = gex_whitelist, @@ -100,26 +133,43 @@ workflow Multiome { # Call CellBender if (run_cellbender) { - call CellBender.run_cellbender_remove_background_gpu as CellBender { - input: - sample_name = input_id, - input_file_unfiltered = Optimus.h5ad_output_file, - hardware_boot_disk_size_GB = 20, - hardware_cpu_count = 4, - hardware_disk_size_GB = 50, - hardware_gpu_type = "nvidia-tesla-t4", - hardware_memory_GB = 32, - hardware_preemptible_tries = 2, - hardware_zones = "us-central1-a us-central1-c", - nvidia_driver_version = "470.82.01" - - } + if (cloud_provider == "gcp") { + call CellBender.run_cellbender_remove_background_gpu as CellBender { + input: + sample_name = input_id, + input_file_unfiltered = Optimus.h5ad_output_file, + hardware_boot_disk_size_GB = 20, + hardware_cpu_count = 4, + hardware_disk_size_GB = 50, + hardware_gpu_type = "nvidia-tesla-t4", + hardware_memory_GB = 32, + hardware_preemptible_tries = 2, + hardware_zones = "us-central1-a us-central1-c", + nvidia_driver_version = "470.82.01" + } + } + if (cloud_provider == "azure") { + call CellBender_no_cuda.run_cellbender_remove_background_gpu as CellBender_no_cuda { + input: + sample_name = input_id, + input_file_unfiltered = Optimus.h5ad_output_file, + hardware_boot_disk_size_GB = 20, + hardware_cpu_count = 4, + hardware_disk_size_GB = 50, + hardware_gpu_type = "nvidia-tesla-t4", + hardware_memory_GB = 32, + hardware_preemptible_tries = 2, + hardware_zones = "us-central1-a us-central1-c", + nvidia_driver_version = "470.82.01" + } + } } meta { allowNestedInputs: true } + output { String multiome_pipeline_version_out = pipeline_version diff --git a/pipelines/skylab/multiome/atac.changelog.md b/pipelines/skylab/multiome/atac.changelog.md index 34587f7f6f..f44b5b2328 100644 --- a/pipelines/skylab/multiome/atac.changelog.md +++ b/pipelines/skylab/multiome/atac.changelog.md @@ -1,3 +1,8 @@ +# 2.2.0 +2024-07-11 (Date of Last Commit) + +* Updated the atac.wdl to run on Azure. cloud_provider is a new, required input. + # 2.1.0 2024-07-09 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/atac.json b/pipelines/skylab/multiome/atac.json index a8b9465fdc..1e898edd48 100644 --- a/pipelines/skylab/multiome/atac.json +++ b/pipelines/skylab/multiome/atac.json @@ -4,6 +4,7 @@ "ATAC.TrimAdapters.adapter_seq_read1": "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG", "ATAC.TrimAdapters.adapter_seq_read2": "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG", "ATAC.input_id": "scATAC", + "ATAC.cloud_provider":"gcp", "ATAC.tar_bwa_reference": "gs://fc-dd55e131-ef49-4d02-aa2a-20640daaae1e/submissions/8f0dd71a-b42f-4503-b839-3f146941758a/IndexRef/53a91851-1f6c-4ab9-af66-b338ffb28b5a/call-BwaMem2Index/GRCh38.primary_assembly.genome.bwamem2.fa.tar", "ATAC.preindex": "false" } diff --git a/pipelines/skylab/multiome/atac.wdl b/pipelines/skylab/multiome/atac.wdl index 2a43694bca..b54f91043b 100644 --- a/pipelines/skylab/multiome/atac.wdl +++ b/pipelines/skylab/multiome/atac.wdl @@ -3,6 +3,7 @@ version 1.0 import "../../../tasks/skylab/MergeSortBam.wdl" as Merge import "../../../tasks/skylab/FastqProcessing.wdl" as FastqProcessing import "../../../tasks/skylab/PairedTagUtils.wdl" as AddBB +import "../../../tasks/broad/Utilities.wdl" as utils workflow ATAC { meta { @@ -18,6 +19,7 @@ workflow ATAC { # Output prefix/base name for all intermediate files and pipeline outputs String input_id + String cloud_provider # Additional library aliquot ID String? atac_nhash_id @@ -30,10 +32,11 @@ workflow ATAC { Int num_threads_bwa = 128 Int mem_size_bwa = 512 String cpu_platform_bwa = "Intel Ice Lake" + String vm_size # Text file containing chrom_sizes for genome build (i.e. hg38) File chrom_sizes - #File for annotations for calculating ATAC TSSE + #File for annotations for calculating ATAC TSSE File annotations_gtf # Whitelist File whitelist @@ -43,7 +46,28 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.1.0" + String pipeline_version = "2.2.0" + + # Determine docker prefix based on cloud provider + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # Docker image names + String warp_tools_2_0_0 = "warp-tools:2.0.0" + String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" + String samtools_docker = "samtools-dist-bwa:3.0.0" + String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" + String snap_atac_docker = "snapatac2:1.0.9-2.6.3-1715865353" + + # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + parameter_meta { read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads" @@ -54,14 +78,14 @@ workflow ATAC { num_threads_bwa: "Number of threads for bwa-mem2 task (default: 128)" mem_size_bwa: "Memory size in GB for bwa-mem2 task (default: 512)" cpu_platform_bwa: "CPU platform for bwa-mem2 task (default: Intel Ice Lake)" - } call GetNumSplits { input: nthreads = num_threads_bwa, mem_size = mem_size_bwa, - cpu_platform = cpu_platform_bwa + cpu_platform = cpu_platform_bwa, + vm_size = vm_size } call FastqProcessing.FastqProcessATAC as SplitFastq { @@ -71,7 +95,8 @@ workflow ATAC { barcodes_fastq = read2_fastq_gzipped, output_base_name = input_id, num_output_files = GetNumSplits.ranks_per_node_out, - whitelist = whitelist + whitelist = whitelist, + docker_path = docker_prefix + warp_tools_2_0_0 } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -81,7 +106,8 @@ workflow ATAC { read3_fastq = SplitFastq.fastq_R3_output_array[idx], output_base_name = input_id + "_" + idx, adapter_seq_read1 = adapter_seq_read1, - adapter_seq_read3 = adapter_seq_read3 + adapter_seq_read3 = adapter_seq_read3, + docker_path = docker_prefix + cutadapt_docker } } @@ -93,14 +119,18 @@ workflow ATAC { output_base_name = input_id, nthreads = num_threads_bwa, mem_size = mem_size_bwa, - cpu_platform = cpu_platform_bwa + cpu_platform = cpu_platform_bwa, + docker_path = docker_prefix + samtools_docker, + cloud_provider = cloud_provider, + vm_size = vm_size } if (preindex) { call AddBB.AddBBTag as BBTag { input: bam = BWAPairedEndAlignment.bam_aligned_output, - input_id = input_id + input_id = input_id, + docker_path = docker_prefix + upstools_docker } call CreateFragmentFile as BB_fragment { input: @@ -108,6 +138,7 @@ workflow ATAC { chrom_sizes = chrom_sizes, annotations_gtf = annotations_gtf, preindex = preindex, + docker_path = docker_prefix + snap_atac_docker, atac_nhash_id = atac_nhash_id } } @@ -118,6 +149,7 @@ workflow ATAC { chrom_sizes = chrom_sizes, annotations_gtf = annotations_gtf, preindex = preindex, + docker_path = docker_prefix + snap_atac_docker, atac_nhash_id = atac_nhash_id } @@ -142,12 +174,14 @@ task GetNumSplits { Int mem_size String cpu_platform String docker_image = "ubuntu:latest" + String vm_size } parameter_meta { docker_image: "the ubuntu docker image (default: ubuntu:latest)" nthreads: "Number of threads per node (default: 128)" mem_size: "the size of memory used during alignment" + vm_size: "the virtual machine used for the task" } command <<< @@ -216,6 +250,7 @@ task GetNumSplits { cpu: nthreads cpuPlatform: cpu_platform memory: "${mem_size} GiB" + vm_size: vm_size } output { @@ -240,7 +275,7 @@ task TrimAdapters { # Runtime attributes/docker Int disk_size = ceil(2 * ( size(read1_fastq, "GiB") + size(read3_fastq, "GiB") )) + 200 Int mem_size = 4 - String docker_image = "us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919" + String docker_path } parameter_meta { @@ -251,7 +286,7 @@ task TrimAdapters { adapter_seq_read1: "cutadapt option for the sequence adapter for read 1 fastq" adapter_seq_read3: "cutadapt option for the sequence adapter for read 3 fastq" output_base_name: "base name to be used for the output of the task" - docker_image: "the docker image using cutadapt to be used (default:us.gcr.io/broad-gotc-prod/cutadapt:1.0.0-4.4-1686752919)" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "the size of memory used during trimming adapters" disk_size : "disk size used in trimming adapters step" } @@ -278,7 +313,7 @@ task TrimAdapters { # use docker image for given tool cutadapat runtime { - docker: docker_image + docker: docker_path disks: "local-disk ${disk_size} HDD" memory: "${mem_size} GiB" } @@ -299,13 +334,15 @@ task BWAPairedEndAlignment { String read_group_sample_name = "RGSN1" String suffix = "trimmed_adapters.fastq.gz" String output_base_name - String docker_image = "us.gcr.io/broad-gotc-prod/samtools-dist-bwa:2.0.0" + String docker_path + String cloud_provider # Runtime attributes Int disk_size = 2000 Int nthreads Int mem_size - String cpu_platform + String cpu_platform + String vm_size } parameter_meta { @@ -318,7 +355,9 @@ task BWAPairedEndAlignment { mem_size: "the size of memory used during alignment" disk_size : "disk size used in bwa alignment step" output_base_name: "basename to be used for the output of the task" - docker_image: "the docker image using BWA to be used (default: us.gcr.io/broad-gotc-prod/samtools-bwa-mem-2:1.0.0-2.2.1_x64-linux-1685469504)" + docker_path: "The docker image path containing the runtime environment for this task" + cloud_provider: "The cloud provider for the pipeline." + vm_size: "the virtual machine used for the task" } String bam_aligned_output_name = output_base_name + ".bam" @@ -417,21 +456,38 @@ task BWAPairedEndAlignment { # rename file to this mv final.sorted.bam ~{bam_aligned_output_name} + echo "the present working dir" + pwd + # save output logs for bwa-mem2 mkdir output_logs - mv *txt output_logs - tar -zcvf /cromwell_root/output_distbwa_log.tar.gz output_logs - - # move bam file to /cromwell_root - mv ~{bam_aligned_output_name} /cromwell_root + mv *.txt output_logs + + if [ "~{cloud_provider}" == "gcp" ]; then + tar -zcvf output_distbwa_log.tar.gz output_logs + mv output_distbwa_log.tar.gz ../ + else + tar -zcvf output_distbwa_log.tar.gz output_logs + mv output_distbwa_log.tar.gz ../ + fi + + # move bam file to the root of cromwell + # if the cloud provider is azure, move the file to /cromwell-executions + # if the cloud provider is gcp, move the file to /cromwell_root + if [ "~{cloud_provider}" == "gcp" ]; then + mv ~{bam_aligned_output_name} ../ + else + mv ~{bam_aligned_output_name} ../ + fi >>> runtime { - docker: docker_image + docker: docker_path disks: "local-disk ${disk_size} SSD" cpu: nthreads cpuPlatform: cpu_platform memory: "${mem_size} GiB" + vm_size: vm_size } output { @@ -444,6 +500,7 @@ task BWAPairedEndAlignment { task CreateFragmentFile { input { File bam + File annotations_gtf File chrom_sizes File annotations_gtf Boolean preindex @@ -451,6 +508,7 @@ task CreateFragmentFile { Int mem_size = 16 Int nthreads = 4 String cpuPlatform = "Intel Cascade Lake" + String docker_path String atac_nhash_id = "" } @@ -462,6 +520,7 @@ task CreateFragmentFile { annotations_gtf: "GTF for SnapATAC2 to calculate TSS sites of fragment file." disk_size: "Disk size used in create fragment file step." mem_size: "The size of memory used in create fragment file." + docker_path: "The docker image path containing the runtime environment for this task" } command <<< @@ -511,7 +570,7 @@ task CreateFragmentFile { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.9-2.6.3-1715865353" + docker: docker_path disks: "local-disk ${disk_size} SSD" memory: "${mem_size} GiB" cpu: nthreads diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json index 297bdfa2dc..108c300744 100644 --- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -1,6 +1,7 @@ { "Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", "Multiome.input_id":"10k_PBMC_downsampled", + "Multiome.cloud_provider":"gcp", "Multiome.gex_r1_fastq":[ "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_gex.fastq.gz" ], diff --git a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json index c4965dd9a4..2937b6f4b9 100644 --- a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json +++ b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc.json @@ -5,6 +5,7 @@ "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_I1_001.fastq.gz" ], "Multiome.input_id":"10k_PBMC", + "Multiome.cloud_provider":"gcp", "Multiome.gex_r1_fastq":[ "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R1_001.fastq.gz", "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R1_001.fastq.gz" diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 4860fcb13a..cdbfca2c42 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,3 +1,9 @@ +# 7.4.0 +2024-07-11 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure. cloud_provider is a new, required input. +* Updated GermlineVariantDiscovery, BamProcessing, DragenTasks, Qc, and Utilities tasks to allow multi-cloud dockers + # 7.3.0 2024-07-09 (Date of Last Commit) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index fd79a6d50a..0312b41b02 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -7,6 +7,7 @@ import "../../../tasks/skylab/RunEmptyDrops.wdl" as RunEmptyDrops import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/skylab/MergeSortBam.wdl" as Merge import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils +import "../../../tasks/broad/Utilities.wdl" as utils workflow Optimus { meta { @@ -14,6 +15,8 @@ workflow Optimus { } input { + String cloud_provider + # Mode for counting either "sc_rna" or "sn_rna" String counting_mode = "sc_rna" @@ -68,18 +71,55 @@ workflow Optimus { # version of this pipeline - String pipeline_version = "7.3.0" + String pipeline_version = "7.4.0" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) # 10x parameters - File whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt" - File whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt" + File gcp_whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt" + File gcp_whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt" + File azure_whitelist_v2 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/737k-august-2016.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" + File azure_whitelist_v3 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/3M-febrary-2018.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" + # Takes the first read1 FASTQ from the inputs to check for chemistry match File r1_single_fastq = r1_fastq[0] + # docker images + String picard_cloud_docker = "picard-cloud:2.26.10" + String pytools_docker = "pytools:1.0.0-1661263730" + String empty_drops_docker = "empty-drops:1.0.1-4.2" + String star_docker = "star:1.0.1-2.7.11a-1692706072" + String warp_tools_docker_2_0_1 = "warp-tools:2.0.1" + String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985" + String star_merge_docker = "star-merge-npz:1.2" + + #TODO how do we handle these? + String alpine_docker = "alpine-bash@sha256:965a718a07c700a5204c77e391961edee37477634ce2f9cf652a8e4c2db858ff" + String gcp_alpine_docker_prefix = "bashell/" + String acr_alpine_docker_prefix = "dsppipelinedev.azurecr.io/" + String alpine_docker_prefix = if cloud_provider == "gcp" then gcp_alpine_docker_prefix else acr_alpine_docker_prefix + + String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + + # choose docker prefix based on cloud provider + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } + } + parameter_meta { r1_fastq: "forward read, contains cell barcodes and molecule barcodes" r2_fastq: "reverse read, contains cDNA fragment generated from captured mRNA" @@ -101,16 +141,21 @@ workflow Optimus { force_no_check = force_no_check, counting_mode = counting_mode, count_exons = count_exons, - whitelist_v2 = whitelist_v2, - whitelist_v3 = whitelist_v3, + gcp_whitelist_v2 = gcp_whitelist_v2, + gcp_whitelist_v3 = gcp_whitelist_v3, + azure_whitelist_v2 = azure_whitelist_v2, + azure_whitelist_v3 = azure_whitelist_v3, tenx_chemistry_version = tenx_chemistry_version, r1_fastq = r1_single_fastq, - ignore_r1_read_length = ignore_r1_read_length + ignore_r1_read_length = ignore_r1_read_length, + cloud_provider = cloud_provider, + alpine_docker_path = alpine_docker_prefix + alpine_docker } call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call FastqProcessing.FastqProcessing as SplitFastq { @@ -121,7 +166,8 @@ workflow Optimus { whitelist = whitelist, chemistry = tenx_chemistry_version, sample_id = input_id, - read_struct = read_struct + read_struct = read_struct, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -136,21 +182,24 @@ workflow Optimus { counting_mode = counting_mode, count_exons = count_exons, output_bam_basename = output_bam_basename + "_" + idx, - soloMultiMappers = soloMultiMappers + soloMultiMappers = soloMultiMappers, + star_docker_path = docker_prefix + star_docker } } call Merge.MergeSortBamFiles as MergeBam { input: bam_inputs = STARsoloFastq.bam_output, output_bam_filename = output_bam_basename + ".bam", - sort_order = "coordinate" + sort_order = "coordinate", + picard_cloud_docker_path = docker_prefix + picard_cloud_docker } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = MergeBam.output_bam, mt_genes = mt_genes, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call Metrics.CalculateCellMetrics as CellMetrics { @@ -158,7 +207,8 @@ workflow Optimus { bam_input = MergeBam.output_bam, mt_genes = mt_genes, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -172,6 +222,7 @@ workflow Optimus { umipercell = STARsoloFastq.umipercell, input_id = input_id, counting_mode = counting_mode, + star_merge_docker_path = docker_prefix + star_merge_docker, expected_cells = expected_cells, gex_nhash_id = gex_nhash_id } @@ -181,7 +232,8 @@ workflow Optimus { sparse_count_matrix = MergeStarOutputs.sparse_counts, row_index = MergeStarOutputs.row_index, col_index = MergeStarOutputs.col_index, - emptydrops_lower = emptydrops_lower + emptydrops_lower = emptydrops_lower, + empty_drops_docker_path = docker_prefix + empty_drops_docker } } @@ -201,7 +253,8 @@ workflow Optimus { gene_id = MergeStarOutputs.col_index, empty_drops_result = RunEmptyDrops.empty_drops_result, counting_mode = counting_mode, - pipeline_version = "Optimus_v~{pipeline_version}" + pipeline_version = "Optimus_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } if (count_exons && counting_mode=="sn_rna") { @@ -217,6 +270,7 @@ workflow Optimus { align_features = STARsoloFastq.align_features_sn_rna, umipercell = STARsoloFastq.umipercell_sn_rna, input_id = input_id, + star_merge_docker_path = docker_prefix + star_merge_docker, gex_nhash_id = gex_nhash_id } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ @@ -235,7 +289,8 @@ workflow Optimus { sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts, cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, - pipeline_version = "Optimus_v~{pipeline_version}" + pipeline_version = "Optimus_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json index 087a8667d5..36c0b5d3bd 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json @@ -16,5 +16,6 @@ "Optimus.tenx_chemistry_version": "3", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", "Optimus.star_strand_mode": "Forward", + "Optimus.cloud_provider": "gcp", "Optimus.gex_nhash_id":"example_1234" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json index 39e5cf9b83..d999f69fa9 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json @@ -27,6 +27,7 @@ "Optimus.input_id": "neurons2k_mouse", "Optimus.tenx_chemistry_version": "2", "Optimus.star_strand_mode": "Unstranded", + "Optimus.cloud_provider": "gcp", "Optimus.gex_nhash_id":"example_1234", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index c4f712a56b..a68235cfbf 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -25,6 +25,7 @@ "Optimus.star_strand_mode": "Unstranded", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", "Optimus.counting_mode": "sn_rna", - "Optimus.gex_nhash_id":"example_1234", - "Optimus.count_exons": true + "Optimus.count_exons": true, + "Optimus.cloud_provider": "gcp", + "Optimus.gex_nhash_id":"example_1234" } diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index 747762b380..d6eefd1ae3 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,10 +1,14 @@ +# 1.3.0 +2024-07-11 (Date of Last Commit) + +* Updated the PairedTag.wdl to run on Azure. cloud_provider is a new, required input. + # 1.2.0 2024-07-09 (Date of Last Commit) * Added new optional input parameter of nhash_id, an optional identifier for a library aliquot that is echoed in the workflow fragment h5ad, the Optimus workflow gene expression h5ad (in the data.uns), and the Optimus gene expression library metrics CSV output; default is set to null * Added test statements again for GH action (to release from develop). Will probably revert - # 1.1.0 2024-06-28 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index b647ade474..0ac9aeb8db 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -4,8 +4,12 @@ import "../../../pipelines/skylab/multiome/atac.wdl" as atac import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing +import "../../../tasks/broad/Utilities.wdl" as utils + workflow PairedTag { - String pipeline_version = "1.2.0" + + String pipeline_version = "1.3.0" + input { String input_id @@ -26,7 +30,7 @@ workflow PairedTag { Boolean ignore_r1_read_length = false String star_strand_mode = "Forward" Boolean count_exons = false - File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" + File gex_whitelist = if cloud_provider == "gcp" then "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" else "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_gex.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" String? soloMultiMappers = "Uniform" # ATAC inputs @@ -34,6 +38,9 @@ workflow PairedTag { Array[File] atac_r1_fastq Array[File] atac_r2_fastq Array[File] atac_r3_fastq + + String vm_size = "Standard_M128s" + # BWA input File tar_bwa_reference File chrom_sizes @@ -41,11 +48,34 @@ workflow PairedTag { String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" # Whitelist - File atac_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" + File atac_whitelist = if cloud_provider == "gcp" then "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt" else "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/arc-v1/737K-arc-v1_atac.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" # PairedTag Boolean preindex + + # Expected to be either 'gcp' or 'azure' + String cloud_provider + } + + # All docker images that are needed for tasks in this workflow + String upstools_docker = "upstools:2.0.0" + String snapatac_docker = "snapatac2:1.0.4-2.3.1-1700590229" + + # Prefixes based on cloud env + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + + # choose docker prefix based on cloud_provider input + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } + # Call the Optimus workflow call optimus.Optimus as Optimus { input: @@ -65,6 +95,7 @@ workflow PairedTag { ignore_r1_read_length = ignore_r1_read_length, star_strand_mode = star_strand_mode, count_exons = count_exons, + cloud_provider = cloud_provider, soloMultiMappers = soloMultiMappers, gex_nhash_id = nhash_id } @@ -79,9 +110,12 @@ workflow PairedTag { barcodes_fastq = atac_r2_fastq[idx], input_id = input_id, whitelist = atac_whitelist, - preindex = preindex + preindex = preindex, + docker_path = docker_prefix + upstools_docker } - } + } + + # Call the ATAC workflow call atac.ATAC as Atac_preindex { input: read1_fastq_gzipped = demultiplex.fastq1, @@ -95,6 +129,8 @@ workflow PairedTag { adapter_seq_read3 = adapter_seq_read3, annotations_gtf = annotations_gtf, preindex = preindex, + cloud_provider = cloud_provider, + vm_size = vm_size, atac_nhash_id = nhash_id } @@ -102,7 +138,8 @@ workflow PairedTag { call Demultiplexing.ParseBarcodes as ParseBarcodes { input: atac_h5ad = Atac_preindex.snap_metrics, - atac_fragment = Atac_preindex.fragment_file + atac_fragment = Atac_preindex.fragment_file, + docker_path = docker_prefix + snapatac_docker, } } diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json index c2ad3acc9c..d5cfaf7181 100644 --- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json +++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -22,7 +22,8 @@ "PairedTag.preindex":"false", "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake", "PairedTag.Atac_preindex.num_threads_bwa":"16", - "PairedTag.Atac_preindex.mem_size_bwa":"64", + "PairedTag.Atac_preindex.mem_size_bwa":"64", "PairedTag.soloMultiMappers":"Uniform", + "PairedTag.cloud_provider": "gcp", "PairedTag.nhash_id":"example_1234" } diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json index 6401549cae..1a22504c14 100644 --- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json +++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json @@ -24,5 +24,6 @@ "PairedTag.Atac_preindex.num_threads_bwa":"16", "PairedTag.Atac_preindex.mem_size_bwa":"64", "PairedTag.soloMultiMappers":"Uniform", + "PairedTag.cloud_provider": "gcp", "PairedTag.nhash_id":"example_1234" } diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json index 2c2f9e5afa..27e1b1b124 100644 --- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json +++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json @@ -24,5 +24,6 @@ "PairedTag.Atac_preindex.num_threads_bwa":"16", "PairedTag.Atac_preindex.mem_size_bwa":"64", "PairedTag.soloMultiMappers":"Uniform", + "PairedTag.cloud_provider": "gcp", "PairedTag.nhash_id":"example_1234" } diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index cb7c2cebbf..bdef191cc9 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,8 @@ +# 3.2.0 +2024-07-11 (Date of Last Commit) + +* Updated the Optimus.wdl to run on Azure; cloud_provider is a new, required input + # 3.1.8 2024-07-09 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index 3f8ddc3548..409e3123b6 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -6,6 +6,8 @@ import "../../../tasks/skylab/Metrics.wdl" as Metrics import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/skylab/MergeSortBam.wdl" as Merge +import "../../../tasks/broad/Utilities.wdl" as utils + ## Copyright Broad Institute, 2022 ## @@ -23,7 +25,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge workflow SlideSeq { - String pipeline_version = "3.1.8" + String pipeline_version = "3.2.0" input { Array[File] r1_fastq @@ -39,6 +41,34 @@ workflow SlideSeq { Boolean count_exons = true File bead_locations + String cloud_provider + + } + + # docker images + String pytools_docker = "pytools:1.0.0-1661263730" + String picard_cloud_docker = "picard-cloud:2.26.10" + String warp_tools_docker_2_0_1 = "warp-tools:2.0.1" + String warp_tools_docker_2_0_2 = "warp-tools:2.0.2-1709308985" + String star_merge_docker = "star-merge-npz:1.2" + + String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + + # choose docker prefix based on cloud provider + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } parameter_meta { @@ -51,7 +81,8 @@ workflow SlideSeq { call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call Metrics.FastqMetricsSlideSeq as FastqMetrics { @@ -86,13 +117,15 @@ workflow SlideSeq { input: bam_inputs = STARsoloFastqSlideSeq.bam_output, output_bam_filename = output_bam_basename + ".bam", - sort_order = "coordinate" + sort_order = "coordinate", + picard_cloud_docker_path = docker_prefix + picard_cloud_docker } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } call Metrics.CalculateUMIsMetrics as UMIsMetrics { input: @@ -105,7 +138,9 @@ workflow SlideSeq { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, - input_id = input_id + input_id = input_id, + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 + } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -113,7 +148,8 @@ workflow SlideSeq { barcodes = STARsoloFastqSlideSeq.barcodes, features = STARsoloFastqSlideSeq.features, matrix = STARsoloFastqSlideSeq.matrix, - input_id = input_id + input_id = input_id, + star_merge_docker_path = docker_prefix + star_merge_docker } if ( !count_exons ) { call H5adUtils.OptimusH5adGeneration as SlideseqH5adGeneration{ @@ -126,7 +162,9 @@ workflow SlideSeq { cell_id = MergeStarOutputs.row_index, gene_id = MergeStarOutputs.col_index, add_emptydrops_data = "no", - pipeline_version = "SlideSeq_v~{pipeline_version}" + pipeline_version = "SlideSeq_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 + } } if (count_exons) { @@ -135,7 +173,8 @@ workflow SlideSeq { barcodes = STARsoloFastqSlideSeq.barcodes_sn_rna, features = STARsoloFastqSlideSeq.features_sn_rna, matrix = STARsoloFastqSlideSeq.matrix_sn_rna, - input_id = input_id + input_id = input_id, + star_merge_docker_path = docker_prefix + star_merge_docker } call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ input: @@ -149,7 +188,8 @@ workflow SlideSeq { sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts, cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, - pipeline_version = "SlideSeq_v~{pipeline_version}" + pipeline_version = "SlideSeq_v~{pipeline_version}", + warp_tools_docker_path = docker_prefix + warp_tools_docker_2_0_1 } } diff --git a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json index d8998d1d9b..035b22c58e 100644 --- a/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json +++ b/pipelines/skylab/slideseq/test_inputs/Plumbing/Puck_210817_11.mm10.json @@ -13,5 +13,6 @@ "SlideSeq.tar_star_reference": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/star/modified_star_2.7.9a_primary_gencode_mouse_vM23.tar", "SlideSeq.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf", "SlideSeq.count_exons": true, - "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv" + "SlideSeq.bead_locations": " gs://broad-gotc-test-storage/SlideSeq/inputs/plumbing/Puck_210817_11/Puck_210817_11.tsv", + "SlideSeq.cloud_provider": "gcp" } \ No newline at end of file diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index cfe0955206..4d00d91015 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,5 +1,7 @@ -# 1.3.6 -2024-07-09 (Date of Last Commit) +# 1.4.0 +2024-07-11 (Date of Last Commit) + +* Updated the PairedTag.wdl to run on Azure. cloud_provider is a new, required input. * Added new optional input parameter of gex_nhash_id to the STARAlign task; this does not impact the MultiSampleSmartSeq2SingleNucleus workflow # 1.3.5 diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index f31a3cf253..068b35003d 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -6,6 +6,7 @@ import "../../../tasks/skylab/StarAlign.wdl" as StarAlign import "../../../tasks/skylab/Picard.wdl" as Picard import "../../../tasks/skylab/FeatureCounts.wdl" as CountAlignments import "../../../tasks/skylab/LoomUtils.wdl" as LoomUtils +import "../../../tasks/broad/Utilities.wdl" as utils workflow MultiSampleSmartSeq2SingleNucleus { meta { @@ -38,9 +39,25 @@ workflow MultiSampleSmartSeq2SingleNucleus { Array[String]? organ String? input_name_metadata_field String? input_id_metadata_field + + String cloud_provider + } + + String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" + String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" + String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" + String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } + # Version of this pipeline - String pipeline_version = "1.3.6" + String pipeline_version = "1.4.0" if (false) { String? none = "None" @@ -72,7 +89,8 @@ workflow MultiSampleSmartSeq2SingleNucleus { call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: - tar_star_reference = tar_star_reference + tar_star_reference = tar_star_reference, + ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call TrimAdapters.TrimAdapters as TrimAdapters { diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json index 8fafd92173..db8f68b114 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json @@ -18,5 +18,6 @@ "SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA", "SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT" ], - "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644" + "MultiSampleSmartSeq2SingleNucleus.batch_id": "SM-GE644", + "MultiSampleSmartSeq2SingleNucleus.cloud_provider": "gcp" } diff --git a/pipelines/skylab/snm3C/snm3C.changelog.md b/pipelines/skylab/snm3C/snm3C.changelog.md index 7d0b92b246..8cf6455276 100644 --- a/pipelines/skylab/snm3C/snm3C.changelog.md +++ b/pipelines/skylab/snm3C/snm3C.changelog.md @@ -1,3 +1,8 @@ +# 4.0.2 +2024-07-09 (Date of Last Commit) + +* Updated the snM3C wdl to run on Azure; this change does not affect the snM3C pipeline + # 4.0.1 2024-06-26 (Date of Last Commit) * Added task to untar files and output files at cell level diff --git a/pipelines/skylab/snm3C/snm3C.wdl b/pipelines/skylab/snm3C/snm3C.wdl index dfad7773f5..dc1f47b00e 100644 --- a/pipelines/skylab/snm3C/snm3C.wdl +++ b/pipelines/skylab/snm3C/snm3C.wdl @@ -1,4 +1,6 @@ version 1.0 +import "../../../tasks/broad/Utilities.wdl" as utils + workflow snm3C { @@ -7,6 +9,7 @@ workflow snm3C { Array[File] fastq_input_read2 File random_primer_indexes String plate_id + String cloud_provider # mapping inputs File tarred_index_files File genome_fa @@ -23,11 +26,25 @@ workflow snm3C { Int num_downstr_bases = 2 Int compress_level = 5 Int batch_number - String docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4" + } + #docker images + String m3c_yap_hisat_docker = "m3c-yap-hisat:2.4" + # Determine docker prefix based on cloud provider + String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" + String acr_docker_prefix = "dsppipelinedev.azurecr.io/" + String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix + String cromwell_root_dir = if cloud_provider == "gcp" then "/cromwell_root" else "/cromwell-executions" + + # make sure either gcp or azr is supplied as cloud_provider input + if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { + call utils.ErrorWithMessage as ErrorMessageIncorrectInput { + input: + message = "cloud_provider must be supplied with either 'gcp' or 'azure'." + } } # version of the pipeline - String pipeline_version = "4.0.1" + String pipeline_version = "4.0.2" call Demultiplexing { input: @@ -35,8 +52,8 @@ workflow snm3C { fastq_input_read2 = fastq_input_read2, random_primer_indexes = random_primer_indexes, plate_id = plate_id, - docker = docker, - batch_number = batch_number + batch_number = batch_number, + docker = docker_prefix + m3c_yap_hisat_docker, } scatter(tar in Demultiplexing.tarred_demultiplexed_fastqs) { @@ -54,7 +71,9 @@ workflow snm3C { r2_left_cut = r2_left_cut, r2_right_cut = r2_right_cut, plate_id = plate_id, - docker = docker + docker = docker_prefix + m3c_yap_hisat_docker, + cromwell_root_dir = cromwell_root_dir, + cloud_provider = cloud_provider, } call Hisat_single_end as Hisat_single_end { @@ -63,20 +82,24 @@ workflow snm3C { tarred_index_files = tarred_index_files, genome_fa = genome_fa, plate_id = plate_id, - docker = docker + docker = docker_prefix + m3c_yap_hisat_docker, + cromwell_root_dir = cromwell_root_dir, + cloud_provider = cloud_provider } call Merge_sort_analyze as Merge_sort_analyze { input: paired_end_unique_tar = Hisat_paired_end.unique_bam_tar, - read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar, - genome_fa = genome_fa, + read_overlap_tar = Hisat_single_end.remove_overlaps_output_bam_tar, + genome_fa = genome_fa, num_upstr_bases = num_upstr_bases, num_downstr_bases = num_downstr_bases, compress_level = compress_level, chromosome_sizes = chromosome_sizes, plate_id = plate_id, - docker = docker + docker = docker_prefix + m3c_yap_hisat_docker, + cromwell_root_dir = cromwell_root_dir, + cloud_provider = cloud_provider } } @@ -89,7 +112,7 @@ workflow snm3C { unique_reads_cgn_extraction_allc_extract = Merge_sort_analyze.extract_allc_output_allc_tar, unique_reads_cgn_extraction_tbi_extract = Merge_sort_analyze.extract_allc_output_tbi_tar, plate_id = plate_id, - docker = docker + docker = docker_prefix + m3c_yap_hisat_docker } call Summary { @@ -103,7 +126,9 @@ workflow snm3C { allc_uniq_reads_stats = Merge_sort_analyze.allc_uniq_reads_stats, unique_reads_cgn_extraction_tbi = Merge_sort_analyze.extract_allc_output_tbi_tar, plate_id = plate_id, - docker = docker + docker = docker_prefix + m3c_yap_hisat_docker, + cromwell_root_dir = cromwell_root_dir, + cloud_provider = cloud_provider } meta { @@ -139,29 +164,31 @@ task Demultiplexing { command <<< set -euo pipefail + WORKING_DIR=`pwd` # Cat files for each r1, r2 - cat ~{sep=' ' fastq_input_read1} > r1.fastq.gz - cat ~{sep=' ' fastq_input_read2} > r2.fastq.gz + cat ~{sep=' ' fastq_input_read1} > $WORKING_DIR/r1.fastq.gz + cat ~{sep=' ' fastq_input_read2} > $WORKING_DIR/r2.fastq.gz # Run cutadapt /opt/conda/bin/cutadapt -Z -e 0.01 --no-indels -j 8 \ -g file:~{random_primer_indexes} \ -o ~{plate_id}-{name}-R1.fq.gz \ -p ~{plate_id}-{name}-R2.fq.gz \ - r1.fastq.gz \ - r2.fastq.gz \ - > ~{plate_id}.stats.txt + $WORKING_DIR/r1.fastq.gz \ + $WORKING_DIR/r2.fastq.gz \ + > $WORKING_DIR/~{plate_id}.stats.txt # remove the fastq files that end in unknown-R1.fq.gz and unknown-R2.fq.gz - rm *-unknown-R{1,2}.fq.gz + rm $WORKING_DIR/*-unknown-R{1,2}.fq.gz python3 < threshold: os.remove(file_path) - print(f'Removed file: {filename}') CODE # Batch the fastq files into folders of batch_number size @@ -195,17 +220,18 @@ task Demultiplexing { # Counter for the folder index folder_index=1 + WORKING_DIR=`pwd` # Define lists of r1 and r2 fq files - R1_files=($(ls | grep "\-R1.fq.gz")) - R2_files=($(ls | grep "\-R2.fq.gz")) + R1_files=($(ls $WORKING_DIR | grep "\-R1.fq.gz")) + R2_files=($(ls $WORKING_DIR | grep "\-R2.fq.gz")) # Distribute the FASTQ files and create TAR files for file in "${R1_files[@]}"; do sample_id=$(basename "$file" "-R1.fq.gz") r2_file="${sample_id}-R2.fq.gz" - mv $file batch$((folder_index))/$file - mv $r2_file batch$((folder_index))/$r2_file + mv $WORKING_DIR/$file batch$((folder_index))/$file + mv $WORKING_DIR/$r2_file batch$((folder_index))/$r2_file # Increment the counter folder_index=$(( (folder_index % $batch_number) + 1 )) done @@ -213,9 +239,8 @@ task Demultiplexing { # Tar up files per batch echo "TAR files" for i in $(seq 1 "${batch_number}"); do - tar -cf - batch${i}/*.fq.gz | pigz > ~{plate_id}.${i}.cutadapt_output_files.tar.gz + tar -cf - $WORKING_DIR/batch${i}/*.fq.gz | pigz > ~{plate_id}.${i}.cutadapt_output_files.tar.gz done - echo "TAR files created successfully." >>> runtime { @@ -240,6 +265,8 @@ task Hisat_paired_end { File chromosome_sizes String plate_id String docker + String cromwell_root_dir + String cloud_provider String r1_adapter String r2_adapter @@ -259,7 +286,8 @@ task Hisat_paired_end { set -euo pipefail set -x lscpu - + WORKING_DIR=`pwd` + # check genomic reference version and print to output txt file STRING=~{genome_fa} BASE=$(basename $STRING .fa) @@ -269,33 +297,39 @@ task Hisat_paired_end { # untar the index files for hisat task start=$(date +%s) echo "Untarring tarred_index_files" - pigz -dc ~{tarred_index_files} | tar -xf - + pigz -dc ~{tarred_index_files} | tar -xf - rm ~{tarred_index_files} - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to untar tarred_index_files: $elapsed seconds" - + # get the basename of the genome_fa file cp ~{genome_fa} . genome_fa_basename=$(basename ~{genome_fa} .fa) - + start=$(date +%s) echo "samtools faidx $genome_fa_basename.fa" samtools faidx $genome_fa_basename.fa - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to samtools faidx: $elapsed seconds" min_read_length=~{min_read_length} - + # untar the demultiplexed fastqs for sort and trim task start=$(date +%s) echo "Untar demultiplexed fastqs" - pigz -dc ~{tarred_demultiplexed_fastqs} | tar -xf - - end=$(date +%s) - elapsed=$((end - start)) + pigz -dc ~{tarred_demultiplexed_fastqs} | tar -xf - + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to untar: $elapsed seconds" - + + if [ ~{cloud_provider} = "gcp" ]; then + batch_dir="~{cromwell_root_dir}~{cromwell_root_dir}/batch*/" + else + batch_dir="~{cromwell_root_dir}/*/*/*/*/*~{cromwell_root_dir}/*/*/*/*/batch*/" + fi + task() { local file=$1 sample_id=$(basename "$file" "-R1.fq.gz") @@ -303,23 +337,25 @@ task Hisat_paired_end { r2_file="${sample_id}-R2.fq.gz" r1_file="${sample_id}-R1.fq.gz" - - # sort + cp $batch_dir/"$r1_file" . + cp $batch_dir/"$r2_file" . + + # sort start=$(date +%s) echo "Run sort r1" - zcat /cromwell_root/batch*/"$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq" - end=$(date +%s) - elapsed=$((end - start)) + zcat "$r1_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq" + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run sort r1: $elapsed seconds" - - # sort + + # sort start=$(date +%s) echo "Run sort r2" - zcat /cromwell_root/batch*/"$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq" - end=$(date +%s) - elapsed=$((end - start)) + zcat "$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq" + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run sort r2: $elapsed seconds" - + # trim using cutadapt start=$(date +%s) echo "Run cutadapt" @@ -338,14 +374,20 @@ task Hisat_paired_end { -p ${sample_id}-R2_trimmed.fq.gz \ ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq \ > ${sample_id}.trimmed.stats.txt - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run cutadapt: $elapsed seconds" - + # hisat run start=$(date +%s) echo "Run hisat" - hisat-3n /cromwell_root/$genome_fa_basename \ + if [ ~{cloud_provider} = "gcp" ]; then + hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename" + else + hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename" + fi + + hisat-3n $hisat_index_file_dir \ -q \ -1 ${sample_id}-R1_trimmed.fq.gz \ -2 ${sample_id}-R2_trimmed.fq.gz \ @@ -356,43 +398,42 @@ task Hisat_paired_end { -t \ --new-summary \ --summary-file ${sample_id}.hisat3n_dna_summary.txt \ - --threads 8 | samtools view -b -q 0 -o "${sample_id}.hisat3n_dna.unsort.bam" - end=$(date +%s) - elapsed=$((end - start)) + --threads 8 | samtools view -b -q 0 -o "${sample_id}.hisat3n_dna.unsort.bam" + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run hisat: $elapsed seconds" - + # call separate_unique_and_multi_align_reads start=$(date +%s) echo "Run separate_unique_and_multi_align_reads" python3 -c 'from cemba_data.hisat3n import separate_unique_and_multi_align_reads;separate_unique_and_multi_align_reads(in_bam_path="'"$sample_id"'.hisat3n_dna.unsort.bam", out_unique_path="'"$sample_id"'.hisat3n_dna.unique_aligned.bam", out_multi_path="'"$sample_id"'.hisat3n_dna.multi_aligned.bam", out_unmappable_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq", unmappable_format="fastq", mapq_cutoff=10, qlen_cutoff='"$min_read_length"')' - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run separate_unique_and_multi_align_reads: $elapsed seconds" - + # call split_hisat3n_unmapped_reads start=$(date +%s) echo "Run split_hisat3n_unmapped_reads" python3 -c 'from cemba_data.hisat3n import *;split_hisat3n_unmapped_reads(fastq_path="'"$sample_id"'.hisat3n_dna.unmapped.fastq",output_prefix="'"$sample_id"'.hisat3n_dna.split_reads",min_length='"$min_read_length"')' - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run split_hisat3n_unmapped_reads: $elapsed seconds" - - rm /cromwell_root/batch*/${sample_id}-R1.fq.gz /cromwell_root/batch*/${sample_id}-R2.fq.gz + + rm ${sample_id}-R1.fq.gz ${sample_id}-R2.fq.gz rm ${sample_id}-R1_sorted.fq ${sample_id}-R2_sorted.fq rm ${sample_id}-R1_trimmed.fq.gz ${sample_id}-R2_trimmed.fq.gz rm ${sample_id}.hisat3n_dna.unsort.bam ${sample_id}.hisat3n_dna.multi_aligned.bam rm ${sample_id}.hisat3n_dna.unmapped.fastq } - # define lists of r1 and r2 fq files - R1_files=($(ls batch*/ | grep "\-R1.fq.gz")) - R2_files=($(ls batch*/ | grep "\-R2.fq.gz")) - # run 6 instances of task in parallel + R1_files=($(ls $batch_dir | grep "\-R1.fq.gz")) + R2_files=($(ls $batch_dir | grep "\-R2.fq.gz")) + + # run 6 instances of task in parallel for file in "${R1_files[@]}"; do ( echo "starting task $file.." - du -h batch*/$file task "$file" sleep $(( (RANDOM % 3) + 1)) ) & @@ -405,13 +446,13 @@ task Hisat_paired_end { wait echo "Tasks all done." du -h * - - #################################### + + #################################### ## make sure that the number of output bams equals the length of R1_files # Count the number of *.hisat3n_dna.unique_aligned.bam files bam_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.unique_aligned.bam' | wc -l) fastq_counts=$(find . -maxdepth 1 -type f -name '*.split_reads*.fastq' | wc -l) - + # Get the length of the array ${R1_files[@]} array_length=${#R1_files[@]} @@ -420,39 +461,39 @@ task Hisat_paired_end { echo "Error: Number of BAM files does not match the length of the array." exit 1 fi - + # Check if the count of FASTQ files matches the length of the array ${R1_files[@]} if [ "$fastq_counts" -ne "$((2 * array_length))" ]; then - echo "Error: Number of FASTQ files ($fastq_count) does not match the 2 * length of the array (${#R1_files[@]})." + echo "Error: Number of FASTQ files: $fastq_count does not match the 2 * length of the array: ${#R1_files[@]}." exit 1 fi echo "Number of BAM and FASTQ files matches the length of the array." - #################################### + #################################### # tar up stats echo "Tar up stats" start=$(date +%s) tar -cf - *.trimmed.stats.txt | pigz > ~{plate_id}.trimmed_stats_files.tar.gz tar -cf - *.hisat3n_dna_summary.txt | pigz > ~{plate_id}.hisat3n_paired_end_stats_files.tar.gz - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run tar stats $elapsed seconds" # tar up the uniqe bams echo "Tar up unique bams" start=$(date +%s) tar -cf - *.hisat3n_dna.unique_aligned.bam | pigz > ~{plate_id}.hisat3n_paired_end_unique_bam_files.tar.gz - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run tar unique bams $elapsed seconds" # tar up the split fastq files echo "Tar up fastqs" start=$(date +%s) tar -cf - *.split_reads*.fastq | pigz > ~{plate_id}.hisat3n_paired_end_split_fastq_files.tar.gz - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run tar fastqs $elapsed seconds" >>> @@ -482,52 +523,55 @@ task Hisat_single_end { File tarred_index_files String plate_id String docker + String cromwell_root_dir + String cloud_provider - Int disk_size = 1000 - Int mem_size = 64 + Int disk_size = 1000 + Int mem_size = 64 Int cpu = 32 Int preemptible_tries = 2 - String cpu_platform = "Intel Ice Lake" + String cpu_platform = "Intel Ice Lake" } command <<< set -euo pipefail set -x lscpu - + WORKING_DIR=`pwd` + # untar the tarred index files echo "Untar tarred_index_files" - start=$(date +%s) - pigz -dc ~{tarred_index_files} | tar -xf - + start=$(date +%s) + pigz -dc ~{tarred_index_files} | tar -xf - rm ~{tarred_index_files} - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to untar tarred_index_files: $elapsed seconds" - + cp ~{genome_fa} . #get the basename of the genome_fa file echo "samtools faidx" - start=$(date +%s) + start=$(date +%s) genome_fa_basename=$(basename ~{genome_fa} .fa) samtools faidx $genome_fa_basename.fa - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to samtools faidx: $elapsed seconds" - + # untar the unmapped fastq files echo "Untar split_fq_tar" - start=$(date +%s) - pigz -dc ~{split_fq_tar} | tar -xf - + start=$(date +%s) + pigz -dc ~{split_fq_tar} | tar -xf - rm ~{split_fq_tar} - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to untar split_fq_tar: $elapsed seconds" - - # make directories - mkdir -p /cromwell_root/merged_sort_bams - mkdir -p /cromwell_root/read_overlap - + + # make directories + mkdir -p ~{cromwell_root_dir}/merged_sort_bams + mkdir -p ~{cromwell_root_dir}/read_overlap + # define lists of r1 and r2 fq files R1_files=($(ls | grep "\.hisat3n_dna.split_reads.R1.fastq")) R2_files=($(ls | grep "\.hisat3n_dna.split_reads.R2.fastq")) @@ -536,12 +580,19 @@ task Hisat_single_end { BASE=$(basename "$file" ".hisat3n_dna.split_reads.R1.fastq") echo $BASE echo "Running hisat on sample_id_R1" $BASE - - echo "Hisat 3n R1" - start=$(date +%s) - + + echo "Hisat 3n R1" + start=$(date +%s) + + if [ ~{cloud_provider} = "gcp" ]; then + hisat_index_file_dir="~{cromwell_root_dir}/$genome_fa_basename" + else + hisat_index_file_dir="$WORKING_DIR/$genome_fa_basename" + fi + + # hisat on R1 single end - hisat-3n /cromwell_root/$genome_fa_basename \ + hisat-3n $hisat_index_file_dir \ -q \ -U ${BASE}.hisat3n_dna.split_reads.R1.fastq \ -S ${BASE}.hisat3n_dna.split_reads.R1.sam --directional-mapping-reverse --base-change C,T \ @@ -551,19 +602,19 @@ task Hisat_single_end { -t \ --new-summary \ --summary-file ${BASE}.hisat3n_dna_split_reads_summary.R1.txt \ - --threads 8 - - end=$(date +%s) - elapsed=$((end - start)) + --threads 8 + + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run $elapsed seconds" echo "Finish running hisat on sample_id_R1" $BASE - - echo "Hisat 3n R2" - start=$(date +%s) + + echo "Hisat 3n R2" + start=$(date +%s) echo "Running hisat on sample_id_R2" $BASE # hisat on R2 single end - hisat-3n /cromwell_root/$genome_fa_basename \ + hisat-3n $hisat_index_file_dir \ -q \ -U ${BASE}.hisat3n_dna.split_reads.R2.fastq \ -S ${BASE}.hisat3n_dna.split_reads.R2.sam --directional-mapping --base-change C,T \ @@ -574,44 +625,50 @@ task Hisat_single_end { --summary-file ${BASE}.hisat3n_dna_split_reads_summary.R2.txt \ --threads 8 - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run $elapsed seconds" echo "Finish running hisat on sample_id_R2" $BASE - + # samtools merge - echo "samtools merge R1 and R2" - start=$(date +%s) + echo "samtools merge R1 and R2" + start=$(date +%s) samtools merge -o ${BASE}.name_merged.sam ${BASE}.hisat3n_dna.split_reads.R1.sam ${BASE}.hisat3n_dna.split_reads.R2.sam -@8 - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run samtools merge $elapsed seconds" - - # samtools sort - echo "samtools sort R1 and R2" - start=$(date +%s) + + # samtools sort + echo "samtools sort R1 and R2" + start=$(date +%s) samtools sort -n -@8 -m1g ${BASE}.name_merged.sam -o ${BASE}.name_sorted.bam - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run samtools sort $elapsed seconds" # samtools filter bam - echo "samtools -q 10" - start=$(date +%s) + echo "samtools -q 10" + start=$(date +%s) samtools view -q 10 ${BASE}.name_sorted.bam -o ${BASE}.name_sorted.filtered.bam - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run samtools -q 10 $elapsed seconds" + if [ ~{cloud_provider} = "gcp" ]; then + bam_path_prefix="~{cromwell_root_dir}" + else + bam_path_prefix=$WORKING_DIR + fi + # remove_overlap_read_parts - echo "call remove_overlap_read_parts" - start=$(date +%s) - python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path=os.path.join(os.path.sep,"cromwell_root","'"$BASE"'.name_sorted.filtered.bam"),out_bam_path=os.path.join(os.path.sep,"cromwell_root","'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam"))' - end=$(date +%s) - elapsed=$((end - start)) + echo "call remove_overlap_read_parts" + start=$(date +%s) + python3 -c 'from cemba_data.hisat3n import *;import os;remove_overlap_read_parts(in_bam_path="'"$BASE"'.name_sorted.filtered.bam",out_bam_path="'"$BASE"'.hisat3n_dna.split_reads.read_overlap.bam")' + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run remove overlap $elapsed seconds" - - # remove files + + # remove files rm ${BASE}.hisat3n_dna.split_reads.R1.fastq ${BASE}.hisat3n_dna.split_reads.R2.fastq rm ${BASE}.hisat3n_dna.split_reads.R1.sam ${BASE}.hisat3n_dna.split_reads.R2.sam rm ${BASE}.name_merged.sam @@ -639,10 +696,10 @@ task Hisat_single_end { ## make sure that the number of output bams equals the length of R1_files # Count the number of bam files bam_count=$(find . -maxdepth 1 -type f -name '*read_overlap.bam' | wc -l) - + # Get the length of the array ${R1_files[@]} array_length=${#R1_files[@]} - + # Check if the count of bams matches the length of the array ${R1_files[@]} if [ "$bam_count" -ne "$array_length" ]; then echo "Error: Number of BAM files does not match the length of the array." @@ -656,16 +713,16 @@ task Hisat_single_end { # tar up the r1 and r2 stats files -p to set number of threads tar -cf - *.hisat3n_dna_split_reads_summary.R1.txt | pigz > ~{plate_id}.hisat3n_dna_split_reads_summary.R1.tar.gz tar -cf - *.hisat3n_dna_split_reads_summary.R2.txt | pigz > ~{plate_id}.hisat3n_dna_split_reads_summary.R2.tar.gz - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run tar summary text files $elapsed seconds" - + # tar up read overlap files echo "Tar up read_overlap bams" start=$(date +%s) tar -cf - *read_overlap.bam | pigz > ~{plate_id}.remove_overlap_read_parts.tar.gz - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to tar read_overlap bams $elapsed seconds" >>> @@ -682,16 +739,18 @@ task Hisat_single_end { File hisat3n_dna_split_reads_summary_R1_tar = "~{plate_id}.hisat3n_dna_split_reads_summary.R1.tar.gz" File hisat3n_dna_split_reads_summary_R2_tar = "~{plate_id}.hisat3n_dna_split_reads_summary.R2.tar.gz" File remove_overlaps_output_bam_tar = "~{plate_id}.remove_overlap_read_parts.tar.gz" - + } } - + task Merge_sort_analyze { input { String plate_id File paired_end_unique_tar File read_overlap_tar String docker + String cromwell_root_dir + String cloud_provider #input for allcools bam-to-allc File genome_fa @@ -712,33 +771,35 @@ task Merge_sort_analyze { set -euo pipefail set -x lscpu - + + WORKING_DIR=`pwd` + # unzip tars echo "Untar paired_end_unique_tar" - start=$(date +%s) - pigz -dc ~{paired_end_unique_tar} | tar -xf - + start=$(date +%s) + pigz -dc ~{paired_end_unique_tar} | tar -xf - rm ~{paired_end_unique_tar} - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to untar paired_end_unique_tar: $elapsed seconds" echo "Untar read_overlap_tar" - start=$(date +%s) - pigz -dc ~{read_overlap_tar} | tar -xf - + start=$(date +%s) + pigz -dc ~{read_overlap_tar} | tar -xf - rm ~{read_overlap_tar} - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to untar read_overlap_tar: $elapsed seconds" - - # reference and index - start=$(date +%s) + + # reference and index + start=$(date +%s) echo "Reference and index fasta" mkdir reference cp ~{genome_fa} reference ls reference samtools faidx reference/*.fa - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to index fasta $elapsed seconds" # define lists of r1 and r2 fq files @@ -753,94 +814,100 @@ task Merge_sort_analyze { fi # make directories - mkdir /cromwell_root/output_bams - mkdir /cromwell_root/temp - mkdir /cromwell_root/allc-${mcg_context} - + mkdir ~{cromwell_root_dir}/output_bams + mkdir ~{cromwell_root_dir}/temp + mkdir ~{cromwell_root_dir}/allc-${mcg_context} + task() { local file=$1 sample_id=$(basename "$file" ".hisat3n_dna.unique_aligned.bam") echo $sample_id - start=$(date +%s) + start=$(date +%s) echo "Merge all unique_aligned and read_overlap" samtools merge -f "${sample_id}.hisat3n_dna.all_reads.bam" "${sample_id}.hisat3n_dna.unique_aligned.bam" "${sample_id}.hisat3n_dna.split_reads.read_overlap.bam" -@4 - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run merge $elapsed seconds" - start=$(date +%s) + start=$(date +%s) echo "Sort all reads by name" - samtools sort -n -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.name_sort.bam" "${sample_id}.hisat3n_dna.all_reads.bam" - end=$(date +%s) - elapsed=$((end - start)) + samtools sort -n -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.name_sort.bam" "${sample_id}.hisat3n_dna.all_reads.bam" + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run sort by name $elapsed seconds" - - start=$(date +%s) + + start=$(date +%s) echo "Sort all reads by position" - samtools sort -O BAM -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.pos_sort.bam" "${sample_id}.hisat3n_dna.all_reads.name_sort.bam" - end=$(date +%s) - elapsed=$((end - start)) + samtools sort -O BAM -@4 -m1g -o "${sample_id}.hisat3n_dna.all_reads.pos_sort.bam" "${sample_id}.hisat3n_dna.all_reads.name_sort.bam" + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run sort by pos $elapsed seconds" - - start=$(date +%s) + + start=$(date +%s) echo "Call Picard remove duplicates" name=${sample_id}.hisat3n_dna.all_reads.deduped - picard MarkDuplicates I=${sample_id}.hisat3n_dna.all_reads.pos_sort.bam O=/cromwell_root/output_bams/${name}.bam \ - M=/cromwell_root/output_bams/${name}.matrix.txt \ - REMOVE_DUPLICATES=true TMP_DIR=/cromwell_root/temp - end=$(date +%s) - elapsed=$((end - start)) + picard MarkDuplicates I=${sample_id}.hisat3n_dna.all_reads.pos_sort.bam O=~{cromwell_root_dir}/output_bams/${name}.bam \ + M=~{cromwell_root_dir}/output_bams/${name}.matrix.txt \ + REMOVE_DUPLICATES=true TMP_DIR=~{cromwell_root_dir}/temp + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to run picard $elapsed seconds" - - start=$(date +%s) + + start=$(date +%s) echo "Call samtools index" - samtools index /cromwell_root/output_bams/${name}.bam - end=$(date +%s) - elapsed=$((end - start)) - echo "Elapsed time to samtools index $elapsed seconds" - - start=$(date +%s) - echo "Call chromatin contacts from name sorted bams" + samtools index ~{cromwell_root_dir}/output_bams/${name}.bam + end=$(date +%s) + elapsed=$((end - start)) + echo "Elapsed time to samtools index $elapsed seconds" + + start=$(date +%s) + echo "Call chromatin contacts from name sorted bams" python3 -c 'from cemba_data.hisat3n import *;import os;import glob;call_chromatin_contacts(bam_path="'"$sample_id"'.hisat3n_dna.all_reads.name_sort.bam",contact_prefix="'"$sample_id"'.hisat3n_dna.all_reads",save_raw=False,save_hic_format=True)' - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to chromatin contacts $elapsed seconds" - start=$(date +%s) - echo "Call allcools bam-to-allc from deduped.bams" + if [ ~{cloud_provider} = "gcp" ]; then + reference_fasta="~{cromwell_root_dir}/reference/~{genome_base}" + else + reference_fasta="$WORKING_DIR/reference/~{genome_base}" + fi + + start=$(date +%s) + echo "Call allcools bam-to-allc from deduped.bams" /opt/conda/bin/allcools bam-to-allc \ - --bam_path /cromwell_root/output_bams/${name}.bam \ - --reference_fasta /cromwell_root/reference/~{genome_base} \ + --bam_path ~{cromwell_root_dir}/output_bams/${name}.bam \ + --reference_fasta $reference_fasta \ --output_path "${sample_id}.allc.tsv.gz" \ --num_upstr_bases ~{num_upstr_bases} \ --num_downstr_bases ~{num_downstr_bases} \ --compress_level ~{compress_level} \ --save_count_df \ --convert_bam_strandness - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to allcools bam-to-allc $elapsed seconds" - start=$(date +%s) - echo "Call allcools extract-all" + start=$(date +%s) + echo "Call allcools extract-all" allcools extract-allc --strandness merge \ --allc_path ${sample_id}.allc.tsv.gz \ - --output_prefix /cromwell_root/allc-${mcg_context}/${sample_id} \ + --output_prefix ~{cromwell_root_dir}/allc-${mcg_context}/${sample_id} \ --mc_contexts ${mcg_context} \ --chrom_size_path ~{chromosome_sizes} - end=$(date +%s) - elapsed=$((end - start)) + end=$(date +%s) + elapsed=$((end - start)) echo "Elapsed time to allcools extract-all $elapsed seconds" - + echo "Remove some bams" rm ${sample_id}.hisat3n_dna.all_reads.bam rm ${sample_id}.hisat3n_dna.all_reads.pos_sort.bam - rm /cromwell_root/${sample_id}.hisat3n_dna.split_reads.read_overlap.bam - rm /cromwell_root/${sample_id}.hisat3n_dna.unique_aligned.bam + rm ~{cromwell_root_dir}/${sample_id}.hisat3n_dna.split_reads.read_overlap.bam + rm ~{cromwell_root_dir}/${sample_id}.hisat3n_dna.unique_aligned.bam } - - # run 4 instances of task in parallel + + # run 4 instances of task in parallel for file in "${UNIQUE_BAMS[@]}"; do ( echo "starting task $file.." @@ -862,7 +929,7 @@ task Merge_sort_analyze { # Count the number of *.hisat3n_dna.unique_aligned.bam files bam_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.all_reads.name_sort.bam' | wc -l) contact_count=$(find . -maxdepth 1 -type f -name '*.hisat3n_dna.all_reads.3C.contact.tsv.gz' | wc -l) - + # Get the length of the array ${UNIQUE_BAMS[@]} array_length=${#UNIQUE_BAMS[@]} @@ -879,21 +946,24 @@ task Merge_sort_analyze { echo "Number of output files matches the length of the array." #################################### - echo "Tar files." - tar -cf - output_bams/*.matrix.txt | pigz > ~{plate_id}.dedup_unique_bam_and_index_unique_bam_stats.tar.gz + echo "recursively ls'sing cromwell root again" + ls -lR ~{cromwell_root_dir} + + echo "Tar files." + tar -cf - ~{cromwell_root_dir}/output_bams/*.matrix.txt | pigz > ~{plate_id}.dedup_unique_bam_and_index_unique_bam_stats.tar.gz tar -cf - *.hisat3n_dna.all_reads.name_sort.bam | pigz > ~{plate_id}.hisat3n_dna.all_reads.name_sort.tar.gz - + # tar outputs of call_chromatin_contacts tar -cf - *.hisat3n_dna.all_reads.3C.contact.tsv.gz | pigz > ~{plate_id}.hisat3n_dna.all_reads.3C.contact.tar.gz tar -cf - *.hisat3n_dna.all_reads.dedup_contacts.tsv.gz | pigz > ~{plate_id}.hisat3n_dna.all_reads.dedup_contacts.tar.gz tar -cf - *.hisat3n_dna.all_reads.contact_stats.csv | pigz > ~{plate_id}.chromatin_contact_stats.tar.gz - + # tar outputs of allcools tar -cf - *.allc.tsv.gz | pigz > ~{plate_id}.allc.tsv.tar.gz tar -cf - *.allc.tsv.gz.tbi | pigz > ~{plate_id}.allc.tbi.tar.gz tar -cf - *.allc.tsv.gz.count.csv | pigz > ~{plate_id}.allc.count.tar.gz - tar -cf - /cromwell_root/allc-${mcg_context}/*.gz | pigz > ~{plate_id}.extract-allc.tar.gz - tar -cf - /cromwell_root/allc-${mcg_context}/*.tbi | pigz > ~{plate_id}.extract-allc_tbi.tar.gz + tar -cf - ~{cromwell_root_dir}/allc-${mcg_context}/*.gz | pigz > ~{plate_id}.extract-allc.tar.gz + tar -cf - ~{cromwell_root_dir}/allc-${mcg_context}/*.tbi | pigz > ~{plate_id}.extract-allc_tbi.tar.gz >>> runtime { @@ -904,7 +974,7 @@ task Merge_sort_analyze { cpuPlatform: cpu_platform preemptible: preemptible_tries } - + output { File allc = "~{plate_id}.allc.tsv.tar.gz" File tbi = "~{plate_id}.allc.tbi.tar.gz" @@ -939,30 +1009,30 @@ task Summary_PerCellOutput { command <<< set -euo pipefail set -x - + # Set root_dir to current working directory root_dir=$(pwd) echo "This is the root directory " $root_dir - + extract_and_remove() { if [ $# -eq 0 ]; then echo "No files exist" return fi - + for tarred_file in "${@}"; do dir_name=`basename "${tarred_file%.tar.gz}"` echo $dir_name - mkdir -p "$root_dir"/"$dir_name" + mkdir -p "$root_dir"/"$dir_name" pigz -dc "$tarred_file" | tar -xvf - -C "$root_dir"/"$dir_name" rm "$tarred_file" done } - + # output files at a cell level echo "Untar files needed at per cell level" - extract_and_remove ~{sep=' ' name_sorted_bams} + extract_and_remove ~{sep=' ' name_sorted_bams} extract_and_remove ~{sep=' ' unique_reads_cgn_extraction_allc} extract_and_remove ~{sep=' ' unique_reads_cgn_extraction_tbi} extract_and_remove ~{sep=' ' all_reads_3C_contacts} @@ -975,7 +1045,7 @@ task Summary_PerCellOutput { docker: docker disks: "local-disk ${disk_size} SSD" cpu: cpu - memory: "${mem_size} GiB" + memory: "${mem_size} GiB" } output { @@ -999,6 +1069,8 @@ task Summary { Array[File] allc_uniq_reads_stats Array[File] unique_reads_cgn_extraction_tbi String plate_id + String cromwell_root_dir + String cloud_provider String docker Int disk_size = 80 @@ -1009,10 +1081,22 @@ task Summary { command <<< set -euo pipefail - mkdir /cromwell_root/fastq - mkdir /cromwell_root/bam - mkdir /cromwell_root/allc - mkdir /cromwell_root/hic + WORKING_DIR=`pwd` + + if [ ~{cloud_provider} = "gcp" ]; then + base_directory=~{cromwell_root_dir} + matrix_files_dir="~{cromwell_root_dir}~{cromwell_root_dir}/output_bams" + allc_index_dir="~{cromwell_root_dir}~{cromwell_root_dir}/allc-*" + else + base_directory=$WORKING_DIR + matrix_files_dir="$WORKING_DIR~{cromwell_root_dir}/output_bams" + allc_index_dir="$WORKING_DIR~{cromwell_root_dir}/allc-*" + fi + + mkdir $base_directory/fastq + mkdir $base_directory/bam + mkdir $base_directory/allc + mkdir $base_directory/hic extract_and_remove() { if [ $# -eq 0 ]; @@ -1021,7 +1105,7 @@ task Summary { return fi for tar in "${@}"; do - tar -xf "$tar" + tar -xvf "$tar" rm "$tar" done } @@ -1035,12 +1119,12 @@ task Summary { extract_and_remove ~{sep=' ' allc_uniq_reads_stats} extract_and_remove ~{sep=' ' unique_reads_cgn_extraction_tbi} - mv *.trimmed.stats.txt /cromwell_root/fastq - mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt /cromwell_root/bam - mv output_bams/*.hisat3n_dna.all_reads.deduped.matrix.txt /cromwell_root/bam - mv *.hisat3n_dna.all_reads.contact_stats.csv /cromwell_root/hic - mv *.allc.tsv.gz.count.csv /cromwell_root/allc - mv cromwell_root/allc-CGN/*.allc.tsv.gz.tbi /cromwell_root/allc + mv *.trimmed.stats.txt $base_directory/fastq + mv *.hisat3n_dna_summary.txt *.hisat3n_dna_split_reads_summary.R1.txt *.hisat3n_dna_split_reads_summary.R2.txt $base_directory/bam + mv $matrix_files_dir/*.hisat3n_dna.all_reads.deduped.matrix.txt $base_directory/bam + mv *.hisat3n_dna.all_reads.contact_stats.csv $base_directory/hic + mv *.allc.tsv.gz.count.csv $base_directory/allc + mv $allc_index_dir/*.allc.tsv.gz.tbi $base_directory/allc python3 -c 'from cemba_data.hisat3n import *;snm3c_summary()' mv MappingSummary.csv.gz ~{plate_id}_MappingSummary.csv.gz diff --git a/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json b/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json index af100dea41..fcacdc5069 100644 --- a/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json +++ b/pipelines/skylab/snm3C/test_inputs/Plumbing/miseq_M16_G13.json @@ -19,5 +19,6 @@ "snm3C.batch_number": 2, "snm3C.Hisat_paired_end.cpu_platform" : "Intel Cascade Lake", "snm3C.Hisat_single_end.cpu_platform" : "Intel Cascade Lake", - "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake" + "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake", + "snm3C.cloud_provider" : "gcp" } diff --git a/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json b/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json index 0709e99fb9..e53437328d 100644 --- a/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json +++ b/pipelines/skylab/snm3C/test_inputs/Scientific/novaseq_M16_G13.json @@ -19,5 +19,6 @@ "snm3C.batch_number": 2, "snm3C.Hisat_paired_end.cpu_platform" : "Intel Cascade Lake", "snm3C.Hisat_single_end.cpu_platform" : "Intel Cascade Lake", - "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake" + "snm3C.Merge_sort_analyze.cpu_platform" : "Intel Cascade Lake", + "snm3C.cloud_provider" : "gcp" } diff --git a/tasks/broad/BamProcessing.wdl b/tasks/broad/BamProcessing.wdl index e5ae21039a..cf4ff4d4e2 100644 --- a/tasks/broad/BamProcessing.wdl +++ b/tasks/broad/BamProcessing.wdl @@ -24,6 +24,8 @@ task SortSam { Int compression_level Int additional_disk = 20 Int memory_multiplier = 1 + #Setting default docker value for workflows that haven't yet been azurized. + String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" } # SortSam spills to disk a lot more because we are only store 300000 records in RAM now because its faster for our data so it needs # more disk space. Also it spills to disk in an uncompressed format so we need to account for that with a larger multiplier @@ -46,7 +48,7 @@ task SortSam { } runtime { - docker: "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" + docker: docker disks: "local-disk " + disk_size + " HDD" cpu: "1" memory: "${machine_mem_mb} MiB" diff --git a/tasks/broad/DragenTasks.wdl b/tasks/broad/DragenTasks.wdl index 149eb5fd12..95b27f2fcc 100644 --- a/tasks/broad/DragenTasks.wdl +++ b/tasks/broad/DragenTasks.wdl @@ -24,6 +24,7 @@ task CalibrateDragstrModel { File str_table_file File alignment ## can handle cram or bam. File alignment_index + #Setting default docker value for workflows that haven't yet been azurized. String docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" Int preemptible_tries = 3 Int threads = 4 diff --git a/tasks/broad/GermlineVariantDiscovery.wdl b/tasks/broad/GermlineVariantDiscovery.wdl index 0e3c8f2e6e..bdfa826dfc 100644 --- a/tasks/broad/GermlineVariantDiscovery.wdl +++ b/tasks/broad/GermlineVariantDiscovery.wdl @@ -27,6 +27,8 @@ task HaplotypeCaller_GATK35_GVCF { Float? contamination Int preemptible_tries Int hc_scatter + #Setting default docker value for workflows that haven't yet been azurized. + String docker = "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384" } parameter_meta { @@ -66,7 +68,7 @@ task HaplotypeCaller_GATK35_GVCF { --read_filter OverclippedRead } runtime { - docker: "us.gcr.io/broad-gotc-prod/gatk:1.3.0-4.2.6.1-1649964384" + docker: docker preemptible: preemptible_tries memory: "10000 MiB" cpu: "1" @@ -96,17 +98,18 @@ task HaplotypeCaller_GATK4_VCF { Boolean use_dragen_hard_filtering = false Boolean use_spanning_event_genotyping = true File? dragstr_model + #Setting default docker value for workflows that haven't yet been azurized. String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" Int memory_multiplier = 1 } - Int memory_size_mb = ceil(8000 * memory_multiplier) + Int memory_size_mb = ceil(8000 * memory_multiplier) + 2000 String output_suffix = if make_gvcf then ".g.vcf.gz" else ".vcf.gz" String output_file_name = vcf_basename + output_suffix Float ref_size = size(ref_fasta, "GiB") + size(ref_fasta_index, "GiB") + size(ref_dict, "GiB") - Int disk_size = ceil(((size(input_bam, "GiB") + 30) / hc_scatter) + ref_size) + 20 + Int disk_size = ceil(((size(input_bam, "GiB") + 30) / hc_scatter) + ref_size) + 50 String bamout_arg = if make_bamout then "-bamout ~{vcf_basename}.bamout.bam" else "" @@ -170,6 +173,8 @@ task MergeVCFs { Array[File] input_vcfs_indexes String output_vcf_name Int preemptible_tries = 3 + #Setting default docker value for workflows that haven't yet been azurized. + String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" } Int disk_size = ceil(size(input_vcfs, "GiB") * 2.5) + 10 @@ -183,7 +188,7 @@ task MergeVCFs { OUTPUT=~{output_vcf_name} } runtime { - docker: "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" + docker: docker preemptible: preemptible_tries memory: "3000 MiB" disks: "local-disk ~{disk_size} HDD" @@ -203,7 +208,7 @@ task Reblock { File ref_fasta File ref_fasta_index String output_vcf_filename - String docker_image = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + String docker_path Int additional_disk = 20 String? annotations_to_keep_command String? annotations_to_remove_command @@ -240,7 +245,7 @@ task Reblock { disks: "local-disk " + disk_size + " HDD" bootDiskSizeGb: 15 preemptible: 3 - docker: docker_image + docker: docker_path } output { @@ -292,7 +297,7 @@ task DragenHardFilterVcf { Boolean make_gvcf String vcf_basename Int preemptible_tries - String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + String gatk_docker } Int disk_size = ceil(2 * size(input_vcf, "GiB")) + 20 diff --git a/tasks/broad/Qc.wdl b/tasks/broad/Qc.wdl index 98265f8dfb..fa771875c4 100644 --- a/tasks/broad/Qc.wdl +++ b/tasks/broad/Qc.wdl @@ -622,7 +622,8 @@ task ValidateVCF { Int preemptible_tries = 3 Boolean is_gvcf = true String? extra_args - String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + #Setting default docker value for workflows that haven't yet been azurized. + String docker_path = "us.gcr.io/broad-gatk/gatk:4.5.0.0" Int machine_mem_mb = 7000 } @@ -657,7 +658,7 @@ task ValidateVCF { ~{extra_args} } runtime { - docker: gatk_docker + docker: docker_path preemptible: preemptible_tries memory: machine_mem_mb + " MiB" bootDiskSizeGb: 15 @@ -677,6 +678,8 @@ task CollectVariantCallingMetrics { File evaluation_interval_list Boolean is_gvcf = true Int preemptible_tries + #Setting default docker value for workflows that haven't yet been azurized. + String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" } Int disk_size = ceil(size(input_vcf, "GiB") + size(dbsnp_vcf, "GiB")) + 20 @@ -692,7 +695,7 @@ task CollectVariantCallingMetrics { ~{true="GVCF_INPUT=true" false="" is_gvcf} } runtime { - docker: "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" + docker: docker preemptible: preemptible_tries memory: "3000 MiB" disks: "local-disk " + disk_size + " HDD" diff --git a/tasks/broad/Utilities.wdl b/tasks/broad/Utilities.wdl index ce6c101368..e6a1aeec17 100644 --- a/tasks/broad/Utilities.wdl +++ b/tasks/broad/Utilities.wdl @@ -79,6 +79,8 @@ task ScatterIntervalList { File interval_list Int scatter_count Int break_bands_at_multiples_of + #Setting default docker value for workflows that haven't yet been azurized. + String docker = "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039" } command <<< @@ -110,7 +112,7 @@ task ScatterIntervalList { Int interval_count = read_int(stdout()) } runtime { - docker: "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039" + docker: docker memory: "2000 MiB" } } diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl index b24c77c133..57fbcaad1a 100644 --- a/tasks/skylab/CheckInputs.wdl +++ b/tasks/skylab/CheckInputs.wdl @@ -55,6 +55,8 @@ task checkInputArrays { task checkOptimusInput { input { + String cloud_provider + #String SAS_TOKEN File r1_fastq String counting_mode Boolean force_no_check @@ -63,9 +65,12 @@ task checkOptimusInput { Int machine_mem_mb = 1000 Int cpu = 1 Int tenx_chemistry_version - String whitelist_v2 - String whitelist_v3 + String gcp_whitelist_v2 + String gcp_whitelist_v3 + String azure_whitelist_v2 + String azure_whitelist_v3 Boolean ignore_r1_read_length + String alpine_docker_path } meta { @@ -108,15 +113,36 @@ task checkOptimusInput { echo "ERROR: Invalid value count_exons should not be used with \"${counting_mode}\" input." fi fi + # Check for chemistry version to produce read structure and whitelist if [[ ~{tenx_chemistry_version} == 2 ]] then - WHITELIST=~{whitelist_v2} + if [[ "~{cloud_provider}" == "gcp" ]] + then + WHITELIST="~{gcp_whitelist_v2}" + elif [[ "~{cloud_provider}" == "azure" ]] + then + WHITELIST="~{azure_whitelist_v2}" + else + pass="false" + echo "ERROR: Cloud provider must be either gcp or azure" + fi + echo "WHITELIST:" $WHITELIST echo $WHITELIST > whitelist.txt echo 16C10M > read_struct.txt elif [[ ~{tenx_chemistry_version} == 3 ]] then - WHITELIST=~{whitelist_v3} + if [[ "~{cloud_provider}" == "gcp" ]] + then + WHITELIST="~{gcp_whitelist_v3}" + elif [[ "~{cloud_provider}" == "azure" ]] + then + WHITELIST="~{azure_whitelist_v3}" + else + pass="false" + echo "ERROR: Cloud provider must be either gcp or azure" + fi + echo "WHITELIST:" $WHITELIST echo $WHITELIST > whitelist.txt echo 16C12M > read_struct.txt else @@ -153,7 +179,7 @@ task checkOptimusInput { String read_struct_out = read_string("read_struct.txt") } runtime { - docker: "bashell/alpine-bash:latest" + docker: alpine_docker_path cpu: cpu memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index a4d7a8e615..20a7169d29 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -11,7 +11,8 @@ task FastqProcessing { String read_struct #using the latest build of warp-tools in GCR - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path + #runtime values Int machine_mem_mb = 40000 Int cpu = 16 @@ -34,7 +35,7 @@ task FastqProcessing { whitelist: "10x genomics cell barcode whitelist" chemistry: "chemistry employed, currently can be tenX_v2 or tenX_v3, the latter implies NO feature barcodes" sample_id: "name of sample matching this file, inserted into read group header" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -111,7 +112,7 @@ task FastqProcessing { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -235,18 +236,15 @@ task FastqProcessingSlidSeq { task FastqProcessATAC { input { - Array[String] read1_fastq - Array[String] read3_fastq - Array[String] barcodes_fastq + Array[File] read1_fastq + Array[File] read3_fastq + Array[File] barcodes_fastq String read_structure = "16C" String barcode_orientation = "FIRST_BP_RC" String output_base_name File whitelist String barcode_index1 = basename(barcodes_fastq[0]) - - # [?] copied from corresponding optimus wdl for fastqprocessing - # using the latest build of warp-tools in GCR - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String docker_path # Runtime attributes [?] Int mem_size = 5 @@ -272,7 +270,7 @@ task FastqProcessATAC { read_structure: "A string that specifies the barcode (C) positions in the Read 2 fastq" barcode_orientation: "A string that specifies the orientation of barcode needed for scATAC data. The default is FIRST_BP. Other options include LAST_BP, FIRST_BP_RC or LAST_BP_RC." whitelist: "10x genomics cell barcode whitelist for scATAC" - docker: "(optional) the docker image containing the runtime environment for this task" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk_size: "(optional) the amount of disk space (GiB) to provision for this task" @@ -296,12 +294,39 @@ task FastqProcessATAC { echo $read1_fastq_files # Make downsample fq for barcode orientation check of R2 barcodes - mkdir /cromwell_root/input_fastq - gcloud storage cp $read1_fastq_files /cromwell_root/input_fastq - gcloud storage cp $read2_fastq_files /cromwell_root/input_fastq - gcloud storage cp $read3_fastq_files /cromwell_root/input_fastq + mkdir -p input_fastqs + + # Function to move files into the input_fastqs directory + move_files_to_input_dir() { + local -n array=$1 # Reference to the array passed as argument + local destination_dir=$2 + + for file in "${array[@]}"; do + if [ -f "$file" ]; then # Check if file exists + echo "Moving $file to $destination_dir" + mv "$file" "$destination_dir" + else + echo "File $file not found" + fi + done + } + + # Move files from FASTQ1_ARRAY to input_fastqs directory + move_files_to_input_dir FASTQ1_ARRAY input_fastqs + + # Move files from FASTQ2_ARRAY to input_fastqs directory + move_files_to_input_dir FASTQ2_ARRAY input_fastqs + + # Move files from FASTQ3_ARRAY to input_fastqs directory + move_files_to_input_dir FASTQ3_ARRAY input_fastqs + + echo "All files moved to input_fastqs directory" + + #gcloud storage cp $read1_fastq_files /cromwell_root/input_fastqs + #gcloud storage cp $read2_fastq_files /cromwell_root/input_fastqs + #gcloud storage cp $read3_fastq_files /cromwell_root/input_fastqs - path="/cromwell_root/input_fastq/" + path="input_fastqs/" barcode_index="~{barcode_index1}" file="${path}${barcode_index}" zcat "$file" | sed -n '2~4p' | shuf -n 1000 > downsample.fq @@ -311,7 +336,7 @@ task FastqProcessATAC { for fastq in "${FASTQ2_ARRAY[@]}" do BASE=`basename $fastq` - BASE=`echo --R1 /cromwell_root/input_fastq/$BASE` + BASE=`echo --R1 input_fastqs/$BASE` R1_FILES_CONCAT+="$BASE " done echo $R1_FILES_CONCAT @@ -321,7 +346,7 @@ task FastqProcessATAC { for fastq in "${FASTQ1_ARRAY[@]}" do BASE=`basename $fastq` - BASE=`echo --R2 /cromwell_root/input_fastq/$BASE` + BASE=`echo --R2 input_fastqs/$BASE` R2_FILES_CONCAT+="$BASE " done echo $R2_FILES_CONCAT @@ -331,12 +356,12 @@ task FastqProcessATAC { for fastq in "${FASTQ3_ARRAY[@]}" do BASE=`basename $fastq` - BASE=`echo --R3 /cromwell_root/input_fastq/$BASE` + BASE=`echo --R3 input_fastqs/$BASE` R3_FILES_CONCAT+="$BASE " done echo $R3_FILES_CONCAT - python3 /warptools/scripts/dynamic-barcode-orientation.py downsample.fq ~{whitelist} best_match.txt + python3 /warptools/scripts/dynamic-barcode-orientation.py downsample.fq "~{whitelist}" best_match.txt cat best_match.txt barcode_choice=$(>> runtime { - docker: docker + docker: docker_path cpu: cpu memory: "${mem_size} MiB" disks: "local-disk ${disk_size} HDD" @@ -369,8 +392,8 @@ task FastqProcessATAC { } output { - Array[File] fastq_R1_output_array = glob("/cromwell_root/output_fastq/fastq_R1_*") - Array[File] fastq_R3_output_array = glob("/cromwell_root/output_fastq/fastq_R3_*") + Array[File] fastq_R1_output_array = glob("fastq_R1_*") + Array[File] fastq_R3_output_array = glob("fastq_R3_*") } } diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 3c5e60b585..4a8d27893f 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -6,7 +6,7 @@ task OptimusH5adGeneration { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path # name of the sample String input_id String gex_nhash_id = "" @@ -106,7 +106,7 @@ task OptimusH5adGeneration { >>> runtime { - docker: docker + docker: warp_tools_docker_path cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" @@ -123,7 +123,7 @@ task SingleNucleusOptimusH5adOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path # name of the sample String input_id # additional aliquot id @@ -207,7 +207,7 @@ task SingleNucleusOptimusH5adOutput { } runtime { - docker: docker + docker: warp_tools_docker_path cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" @@ -221,7 +221,7 @@ task SingleNucleusOptimusH5adOutput { } task JoinMultiomeBarcodes { - input { + input { File atac_h5ad File atac_fragment File gex_h5ad @@ -232,10 +232,11 @@ task JoinMultiomeBarcodes { String cpuPlatform = "Intel Cascade Lake" Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000 Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10 + String docker_path } - String gex_base_name = basename(gex_h5ad, ".h5ad") - String atac_base_name = basename(atac_h5ad, ".h5ad") - String atac_fragment_base = basename(atac_fragment, ".tsv") + String gex_base_name = basename(gex_h5ad, ".h5ad") + String atac_base_name = basename(atac_h5ad, ".h5ad") + String atac_fragment_base = basename(atac_fragment, ".tsv") parameter_meta { atac_h5ad: "The resulting h5ad from the ATAC workflow." @@ -272,7 +273,7 @@ task JoinMultiomeBarcodes { atac_tsv = pd.read_csv("~{atac_fragment}", sep="\t", names=['chr','start', 'stop', 'barcode','n_reads']) whitelist_gex = pd.read_csv("~{gex_whitelist}", header=None, names=["gex_barcodes"]) whitelist_atac = pd.read_csv("~{atac_whitelist}", header=None, names=["atac_barcodes"]) - + # get dataframes df_atac = atac_data.obs df_gex = gex_data.obs @@ -316,7 +317,7 @@ task JoinMultiomeBarcodes { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.9-2.6.3-1715865353" + docker: docker_path disks: "local-disk ~{disk} HDD" memory: "${machine_mem_mb} MiB" cpu: nthreads diff --git a/tasks/skylab/MergeSortBam.wdl b/tasks/skylab/MergeSortBam.wdl index 229ed18f8a..23ea466708 100644 --- a/tasks/skylab/MergeSortBam.wdl +++ b/tasks/skylab/MergeSortBam.wdl @@ -9,7 +9,7 @@ task MergeSortBamFiles { Int compression_level = 5 # runtime values - String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10" + String picard_cloud_docker_path Int machine_mem_mb = 18150 Int cpu = 1 # default to 500GiB of space @@ -28,7 +28,7 @@ task MergeSortBamFiles { parameter_meta { bam_inputs: "Merges Sam/Bam files" sort_order: "sort order of output bam" - docker: "(optional) the docker image containing the runtime environment for this task" + picard_cloud_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -47,7 +47,7 @@ task MergeSortBamFiles { } runtime { - docker: docker + docker: picard_cloud_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/tasks/skylab/Metrics.wdl b/tasks/skylab/Metrics.wdl index fb91283d71..76b85d1012 100644 --- a/tasks/skylab/Metrics.wdl +++ b/tasks/skylab/Metrics.wdl @@ -8,7 +8,8 @@ task CalculateCellMetrics { String input_id # runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + + String warp_tools_docker_path Int machine_mem_mb = 8000 Int cpu = 4 Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) @@ -21,7 +22,7 @@ task CalculateCellMetrics { parameter_meta { bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GX)" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -64,7 +65,7 @@ task CalculateCellMetrics { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -85,7 +86,7 @@ task CalculateGeneMetrics { String input_id # runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.1" + String warp_tools_docker_path Int machine_mem_mb = 32000 Int cpu = 4 Int disk = ceil(size(bam_input, "Gi") * 4) + ceil((size(original_gtf, "Gi") * 3)) @@ -99,7 +100,7 @@ task CalculateGeneMetrics { parameter_meta { bam_input: "Input bam file containing reads marked with tags for cell barcodes (CB), molecule barcodes (UB) and gene ids (GE)" - docker: "(optional) the docker image containing the runtime environment for this task" + warp_tools_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -144,7 +145,7 @@ task CalculateGeneMetrics { } runtime { - docker: docker + docker: warp_tools_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl index e11a5e9d3d..1075f06898 100644 --- a/tasks/skylab/PairedTagUtils.wdl +++ b/tasks/skylab/PairedTagUtils.wdl @@ -7,7 +7,8 @@ task PairedTagDemultiplex { String input_id Boolean preindex File whitelist - String docker = "us.gcr.io/broad-gotc-prod/upstools:2.0.0" + String docker_path + Int cpu = 1 Int disk_size = ceil(2 * (size(read1_fastq, "GiB") + size(read3_fastq, "GiB") + size(barcodes_fastq, "GiB") )) + 400 Int preemptible = 3 @@ -26,7 +27,7 @@ task PairedTagDemultiplex { preindex: "Boolean for whether data has a sample barcode that needs to be demultiplexed" whitelist: "Atac whitelist for 10x multiome data" input_id: "Input ID to demarcate sample" - docker: "(optional) the docker image containing the runtime environment for this task" + docker_path: "(optional) the docker image containing the runtime environment for this task" mem_size: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk_size: "(optional) the amount of disk space (GiB) to provision for this task" @@ -106,6 +107,9 @@ task PairedTagDemultiplex { elif [[ $COUNT == 24 && ~{preindex} == "false" ]] then echo "FASTQ has correct index length, no modification necessary" + + ls -lh + mv "~{input_id}_R2.fq.gz" "~{r2_base}.fq.gz" mv "~{input_id}_R1.fq.gz" "~{r1_base}.fq.gz" mv "~{input_id}_R3.fq.gz" "~{r3_base}.fq.gz" @@ -127,7 +131,7 @@ task PairedTagDemultiplex { >>> runtime { - docker: docker + docker: docker_path cpu: cpu memory: "${mem_size} GiB" disks: "local-disk ${disk_size} HDD" @@ -145,9 +149,7 @@ task AddBBTag { input { File bam String input_id - - # using the latest build of upstools docker in GCR - String docker = "us.gcr.io/broad-gotc-prod/upstools:1.0.0-2023.03.03-1704300311" + String docker_path # Runtime attributes Int mem_size = 8 @@ -165,7 +167,7 @@ task AddBBTag { parameter_meta { bam: "BAM with aligned reads and barcode in the CB tag" input_id: "input ID" - docker: "(optional) the docker image containing the runtime environment for this task" + docker_path: "The docker image path containing the runtime environment for this task" mem_size: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk_size: "(optional) the amount of disk space (GiB) to provision for this task" @@ -184,7 +186,7 @@ task AddBBTag { >>> runtime { - docker: docker + docker: docker_path cpu: cpu memory: "${mem_size} GiB" disks: "local-disk ${disk_size} HDD" @@ -202,6 +204,7 @@ task ParseBarcodes { File atac_fragment Int nthreads = 1 String cpuPlatform = "Intel Cascade Lake" + String docker_path } String atac_base_name = basename(atac_h5ad, ".h5ad") @@ -227,7 +230,7 @@ task ParseBarcodes { # import anndata to manipulate h5ad files import anndata as ad import pandas as pd - import snapatac2 as snap + import snapatac2 as snap print("Reading ATAC h5ad:") atac_data = ad.read_h5ad("~{atac_h5ad}") print("Reading ATAC fragment file:") @@ -273,7 +276,7 @@ task ParseBarcodes { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.9-2.6.3-1715865353" + docker: docker_path disks: "local-disk ~{disk} HDD" memory: "${machine_mem_mb} MiB" cpu: nthreads diff --git a/tasks/skylab/RunEmptyDrops.wdl b/tasks/skylab/RunEmptyDrops.wdl index a0f60b1c99..0921393862 100644 --- a/tasks/skylab/RunEmptyDrops.wdl +++ b/tasks/skylab/RunEmptyDrops.wdl @@ -16,7 +16,7 @@ task RunEmptyDrops { Int emptydrops_lower = 100 # runtime values - String docker = "us.gcr.io/broad-gotc-prod/empty-drops:1.0.1-4.2" + String empty_drops_docker_path Int machine_mem_mb = 32000 Int cpu = 1 Int disk = 20 @@ -48,7 +48,7 @@ task RunEmptyDrops { } runtime { - docker: docker + docker: empty_drops_docker_path memory: "${machine_mem_mb} MiB" disks: "local-disk ${disk} HDD" disk: disk_size + " GB" # TES diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 41e62ba7a9..4c2ac47230 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -226,7 +226,7 @@ task STARsoloFastq { String? soloMultiMappers # runtime values - String docker = "us.gcr.io/broad-gotc-prod/star:1.0.1-2.7.11a-1692706072" + String star_docker_path Int machine_mem_mb = 64000 Int cpu = 8 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. @@ -244,7 +244,7 @@ task STARsoloFastq { r2_fastq: "array of forward read FASTQ files" tar_star_reference: "star reference tarball built against the species that the bam_input is derived from" star_strand_mode: "STAR mode for handling stranded reads. Options are 'Forward', 'Reverse, or 'Unstranded'" - docker: "(optional) the docker image containing the runtime environment for this task" + star_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -345,8 +345,11 @@ task STARsoloFastq { then SoloDirectory="Solo.out/Gene/raw" echo "SoloDirectory is $SoloDirectory" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + echo "list matrix files in $SoloDirectory" + ls "$SoloDirectory"/*.mtx + mv $SoloDirectory/matrix.mtx matrix.mtx mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/Gene/raw/features.tsv" features.tsv mv "Solo.out/Gene/CellReads.stats" CellReads.stats @@ -359,8 +362,11 @@ task STARsoloFastq { then SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" echo "SoloDirectory is $SoloDirectory" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + echo "list matrix files in $SoloDirectory" + ls "$SoloDirectory"/*.mtx + mv $SoloDirectory/matrix.mtx matrix.mtx mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats @@ -370,12 +376,18 @@ task STARsoloFastq { else SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" echo "SoloDirectory is $SoloDirectory" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + echo "list matrix files in $SoloDirectory" + ls "$SoloDirectory"/*.mtx + mv $SoloDirectory/matrix.mtx matrix.mtx SoloDirectory="Solo.out/Gene/raw" echo "SoloDirectory is $SoloDirectory" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo mv {} "/cromwell_root/$new_name"' - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo mv {} "/cromwell_root/$new_name"' + #find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' + echo "list matrix files in $SoloDirectory" + ls "$SoloDirectory"/*.mtx + mv $SoloDirectory/matrix.mtx matrix_sn_rna.mtx mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats @@ -397,7 +409,7 @@ task STARsoloFastq { >>> runtime { - docker: docker + docker: star_docker_path memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" disk: disk + " GB" # TES @@ -450,7 +462,8 @@ task MergeStarOutput { File features_single = features[0] #runtime values - String docker = "us.gcr.io/broad-gotc-prod/star-merge-npz:1.2" + String star_merge_docker_path + Int machine_mem_gb = 20 Int cpu = 1 Int disk = ceil(size(matrix, "Gi") * 2) + 10 @@ -461,7 +474,7 @@ task MergeStarOutput { } parameter_meta { - docker: "(optional) the docker image containing the runtime environment for this task" + star_merge_docker_path: "(optional) the docker image containing the runtime environment for this task" machine_mem_gb: "(optional) the amount of memory (GiB) to provision for this task" cpu: "(optional) the number of cpus to provision for this task" disk: "(optional) the amount of disk space (GiB) to provision for this task" @@ -486,11 +499,12 @@ task MergeStarOutput { mkdir matrix #Using cp because mv isn't moving pwd - cp /cromwell_root/~{input_id}.uniform.mtx ./matrix/matrix.mtx + ls -lR + cp ~{input_id}.uniform.mtx ./matrix/matrix.mtx cp ~{barcodes_single} ./matrix/barcodes.tsv cp ~{features_single} ./matrix/features.tsv - tar -zcvf ~{input_id}.mtx_files.tar ./matrix/* + tar -zcvf ~{input_id}.mtx_files.tar ./matrix/* # Running star for combined cell matrix @@ -582,7 +596,7 @@ task MergeStarOutput { echo "No text files found in the folder." fi - # + # # create the compressed raw count matrix with the counts, gene names and the barcodes python3 /scripts/scripts/create-merged-npz-output.py \ --barcodes ${barcodes_files[@]} \ @@ -592,7 +606,7 @@ task MergeStarOutput { >>> runtime { - docker: docker + docker: star_merge_docker_path memory: "${machine_mem_gb} GiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES @@ -731,6 +745,7 @@ task STARGenomeRefVersion { input { String tar_star_reference Int disk = 10 + String ubuntu_docker_path } meta { @@ -763,7 +778,7 @@ task STARGenomeRefVersion { } runtime { - docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4:latest" + docker: ubuntu_docker_path memory: "2 GiB" disks: "local-disk ${disk} HDD" disk: disk + " GB" # TES diff --git a/verification/test-wdls/TestExomeGermlineSingleSample.wdl b/verification/test-wdls/TestExomeGermlineSingleSample.wdl index e6324a420c..59110d09be 100644 --- a/verification/test-wdls/TestExomeGermlineSingleSample.wdl +++ b/verification/test-wdls/TestExomeGermlineSingleSample.wdl @@ -28,6 +28,7 @@ workflow TestExomeGermlineSingleSample { Boolean update_truth String vault_token_path String google_account_vault_path + String cloud_provider } meta { @@ -46,7 +47,8 @@ workflow TestExomeGermlineSingleSample { target_interval_list = target_interval_list, bait_interval_list = bait_interval_list, bait_set_name = bait_set_name, - provide_bam_output = provide_bam_output + provide_bam_output = provide_bam_output, + cloud_provider = cloud_provider } # Collect all of the pipeline outputs into a single Array[String]] diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl index a09838c3a4..228b6b1f41 100644 --- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl @@ -33,6 +33,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { Boolean update_truth String vault_token_path String google_account_vault_path + + String cloud_provider } meta { @@ -57,7 +59,8 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { species = species, organ = organ, input_name_metadata_field = input_name_metadata_field, - input_id_metadata_field = input_id_metadata_field + input_id_metadata_field = input_id_metadata_field, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl index 1f8b71ba0b..c21c473ede 100644 --- a/verification/test-wdls/TestMultiome.wdl +++ b/verification/test-wdls/TestMultiome.wdl @@ -10,6 +10,7 @@ workflow TestMultiome { input { String input_id + String cloud_provider String nhash_id # Optimus Inputs @@ -26,7 +27,6 @@ workflow TestMultiome { Boolean ignore_r1_read_length = false String star_strand_mode = "Forward" Boolean count_exons = false - File gex_whitelist = "gs://broad-gotc-test-storage/Multiome/input/737K-arc-v1_gex.txt" String? soloMultiMappers # ATAC inputs @@ -43,8 +43,6 @@ workflow TestMultiome { # Trimadapters input String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" - # Whitelist - File atac_whitelist = "gs://broad-gotc-test-storage/Multiome/input/737K-arc-v1_atac.txt" # These values will be determined and injected into the inputs by the scala test framework String truth_path @@ -76,7 +74,6 @@ workflow TestMultiome { ignore_r1_read_length = ignore_r1_read_length, star_strand_mode = star_strand_mode, count_exons = count_exons, - gex_whitelist = gex_whitelist, atac_r1_fastq = atac_r1_fastq, atac_r2_fastq = atac_r2_fastq, atac_r3_fastq = atac_r3_fastq, @@ -84,9 +81,9 @@ workflow TestMultiome { adapter_seq_read1 = adapter_seq_read1, adapter_seq_read3 = adapter_seq_read3, chrom_sizes = chrom_sizes, - atac_whitelist = atac_whitelist, run_cellbender = run_cellbender, soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider, nhash_id = nhash_id } diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl index cd9097f70c..04d0b9c180 100644 --- a/verification/test-wdls/TestOptimus.wdl +++ b/verification/test-wdls/TestOptimus.wdl @@ -60,6 +60,8 @@ workflow TestOptimus { String vault_token_path String google_account_vault_path + String cloud_provider + } meta { @@ -86,6 +88,7 @@ workflow TestOptimus { count_exons = count_exons, ignore_r1_read_length = ignore_r1_read_length, soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider, gex_nhash_id = gex_nhash_id } diff --git a/verification/test-wdls/TestPairedTag.wdl b/verification/test-wdls/TestPairedTag.wdl index 2cfd56cebd..7c32071dc8 100644 --- a/verification/test-wdls/TestPairedTag.wdl +++ b/verification/test-wdls/TestPairedTag.wdl @@ -54,6 +54,7 @@ workflow TestPairedTag { String vault_token_path String google_account_vault_path Boolean run_cellbender = false + String cloud_provider } @@ -88,6 +89,7 @@ workflow TestPairedTag { chrom_sizes = chrom_sizes, atac_whitelist = atac_whitelist, soloMultiMappers = soloMultiMappers, + cloud_provider = cloud_provider, nhash_id = nhash_id } diff --git a/verification/test-wdls/TestReblockGVCF.wdl b/verification/test-wdls/TestReblockGVCF.wdl index f34e22f1b7..01607636c7 100644 --- a/verification/test-wdls/TestReblockGVCF.wdl +++ b/verification/test-wdls/TestReblockGVCF.wdl @@ -27,6 +27,7 @@ workflow TestReblockGVCF { Boolean update_truth String vault_token_path String google_account_vault_path + String cloud_provider } meta { @@ -45,7 +46,8 @@ workflow TestReblockGVCF { annotations_to_keep_command = annotations_to_keep_command, annotations_to_remove_command = annotations_to_remove_command, move_filters_to_genotypes = move_filters_to_genotypes, - gvcf_file_extension = gvcf_file_extension + gvcf_file_extension = gvcf_file_extension, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestSlideSeq.wdl b/verification/test-wdls/TestSlideSeq.wdl index b63cd87099..b0523fee21 100644 --- a/verification/test-wdls/TestSlideSeq.wdl +++ b/verification/test-wdls/TestSlideSeq.wdl @@ -26,6 +26,7 @@ workflow TestSlideSeq { Boolean update_truth String vault_token_path String google_account_vault_path + String cloud_provider } meta { @@ -43,7 +44,8 @@ workflow TestSlideSeq { annotations_gtf = annotations_gtf, output_bam_basename = output_bam_basename, count_exons = count_exons, - bead_locations = bead_locations + bead_locations = bead_locations, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestVariantCalling.wdl b/verification/test-wdls/TestVariantCalling.wdl index b2c3b29273..3054e0a1b9 100644 --- a/verification/test-wdls/TestVariantCalling.wdl +++ b/verification/test-wdls/TestVariantCalling.wdl @@ -39,6 +39,7 @@ workflow TestVariantCalling { Boolean update_truth String vault_token_path String google_account_vault_path + String cloud_provider } meta { @@ -69,7 +70,8 @@ workflow TestVariantCalling { make_bamout = make_bamout, use_gatk3_haplotype_caller = use_gatk3_haplotype_caller, skip_reblocking = skip_reblocking, - use_dragen_hard_filtering = use_dragen_hard_filtering + use_dragen_hard_filtering = use_dragen_hard_filtering, + cloud_provider = cloud_provider } diff --git a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl index d3f775dcc7..16b54c3876 100644 --- a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl +++ b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl @@ -32,6 +32,7 @@ workflow TestWholeGenomeGermlineSingleSample { Boolean use_bwa_mem = true Boolean allow_empty_ref_alt = false Boolean use_dragen_hard_filtering = false + String cloud_provider # These values will be determined and injected into the inputs by the scala test framework String truth_path @@ -66,7 +67,8 @@ workflow TestWholeGenomeGermlineSingleSample { perform_bqsr = perform_bqsr, use_bwa_mem = use_bwa_mem, allow_empty_ref_alt = allow_empty_ref_alt, - use_dragen_hard_filtering = use_dragen_hard_filtering + use_dragen_hard_filtering = use_dragen_hard_filtering, + cloud_provider = cloud_provider } # Collect all of the pipeline outputs into a single Array[String] diff --git a/verification/test-wdls/Testsnm3C.wdl b/verification/test-wdls/Testsnm3C.wdl index f8125fdc22..893c48d1f0 100644 --- a/verification/test-wdls/Testsnm3C.wdl +++ b/verification/test-wdls/Testsnm3C.wdl @@ -36,7 +36,7 @@ workflow Testsnm3C { String vault_token_path String google_account_vault_path - String docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:2.4" + String cloud_provider } meta { @@ -63,7 +63,7 @@ workflow Testsnm3C { num_downstr_bases = num_downstr_bases, compress_level = compress_level, batch_number = batch_number, - docker = docker + cloud_provider = cloud_provider } diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 3f922e86a7..06989e960f 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -45,22 +45,24 @@ ATAC can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/stable/ The following describes the inputs of the ATAC workflow. For more details on how default inputs are set for the Multiome workflow, see the [Multiome overview](../Multiome_Pipeline/README). | Variable name | Description | -| --- | --- | -| read1_fastq_gzipped | Fastq inputs (array of compressed read 1 FASTQ files). | -| read2_fastq_gzipped | Fastq inputs (array of compressed read 2 FASTQ files containing cellular barcodes). | -| read3_fastq_gzipped | Fastq inputs (array of compressed read 3 FASTQ files). | -| input_id | Output prefix/base name for all intermediate files and pipeline outputs. | +| --- |--- | +| read1_fastq_gzipped | Fastq inputs (array of compressed read 1 FASTQ files). | +| read2_fastq_gzipped | Fastq inputs (array of compressed read 2 FASTQ files containing cellular barcodes). | +| read3_fastq_gzipped | Fastq inputs (array of compressed read 3 FASTQ files). | +| input_id | Output prefix/base name for all intermediate files and pipeline outputs. | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | +| preindex | Boolean used for paired-tag data and not applicable to ATAC data types; default is set to false. | +| tar_bwa_reference | BWA reference (tar file containing reference fasta and corresponding files). | +| num_threads_bwa | Optional integer defining the number of CPUs per node for the BWA-mem alignment task (default: 128). | +| mem_size_bwa | Optional integer defining the memory size for the BWA-mem alignment task in GB (default: 512). | +| cpu_platform_bwa | Optional string defining the CPU platform for the BWA-mem alignment task (default: "Intel Ice Lake"). | +| annotations_gtf | CreateFragmentFile input variable: GTF file for SnapATAC2 to calculate TSS sites of fragment file. | +| chrom_sizes | CreateFragmentFile input variable: Text file containing chrom_sizes for genome build (i.e., hg38) | +| whitelist | Whitelist file for ATAC cellular barcodes. | +| adapter_seq_read1 | TrimAdapters input: Sequence adapter for read 1 fastq. | +| adapter_seq_read3 | TrimAdapters input: Sequence adapter for read 3 fastq. | +| vm_size | String defining the Azure virtual machine family for the workflow (default: "Standard_M128s"). | String | | atac_nhash_id | String that represents an optional library aliquot identifier. When used, it is echoed in the h5ad unstructured data. | -| preindex | Boolean used for paired-tag data and not applicable to ATAC data types; default is set to false. | -| tar_bwa_reference | BWA reference (tar file containing reference fasta and corresponding files). | -| num_threads_bwa | Optional integer defining the number of CPUs per node for the BWA-mem alignment task (default: 128). | -| mem_size_bwa | Optional integer defining the memory size for the BWA-mem alignment task in GB (default: 512). | -| cpu_platform_bwa | Optional string defining the CPU platform for the BWA-mem alignment task (default: "Intel Ice Lake"). | -| annotations_gtf | CreateFragmentFile input variable: GTF file for SnapATAC2 to calculate TSS sites of fragment file.| -| chrom_sizes | CreateFragmentFile input variable: Text file containing chrom_sizes for genome build (i.e., hg38) | -| whitelist | Whitelist file for ATAC cellular barcodes. | -| adapter_seq_read1 | TrimAdapters input: Sequence adapter for read 1 fastq. | -| adapter_seq_read3 | TrimAdapters input: Sequence adapter for read 3 fastq. | ## ATAC tasks and tools diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index 308a7e8bab..bfe793457a 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -55,6 +55,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | Input name | Description | Type | | --- | --- | --- | | input_id | Unique identifier describing the biological sample or replicate that corresponds with the FASTQ files; can be a human-readable name or UUID. | String | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | | nhash_id | Optional identifier for the library aliquot; when specified, the workflow will echo the ID in the ATAC and gene expression output h5ads (in the adata.uns section) and in the library-level metrics CSV. | | annotations_gtf | GTF file containing gene annotations used for GEX cell metric calculation and ATAC fragment metrics; must match the GTF used to build the STAR aligner. | File | | gex_r1_fastq | Array of read 1 FASTQ files representing a single GEX 10x library. | Array[File] | @@ -67,19 +68,19 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | emptydrops_lower | Optional threshold for UMIs for the Optimus (GEX) pipeline that empty drops tool should consider for determining cell; data below threshold is not removed; default is "100". | Integer | | force_no_check | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should perform checks; default is "false". | Boolean | | ignore_r1_read_length | Optional boolean for the Optimus (GEX) pipeline indicating if the pipeline should ignore barcode chemistry check; if "true", the workflow will not ensure the `10x_chemistry_version` input matches the chemistry in the read 1 FASTQ; default is "false". | Boolean | -| star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String | -| count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean | -| gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt". | File | -| soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String | -| atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | -| atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] | -| atac_r3_fastq | Array of read 2 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | -| tar_bwa_reference | TAR file containing the reference index files for BWA-mem alignment for the ATAC pipeline. | File | -| chrom_sizes | File containing the genome chromosome sizes; used to calculate ATAC fragment file metrics. | File | -| adapter_seq_read1 | Optional string describing the adapter sequence for ATAC read 1 paired-end reads to be used during adapter trimming with Cutadapt; default is "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG". | String | -| adapter_seq_read3 | Optional string describing the adapter sequence for ATAC read 2 paired-end reads to be used during adapter trimming with Cutadapt; default is "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG". | String | -| atac_whitelist | Optional file containing the list of valid barcodes for 10x multiome ATAC adata; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt". | File | -| run_cellbender | Optional boolean used to determine if the Optimus (GEX) pipeline should run CellBender on the output gene expression h5ad file, `h5ad_output_file_gex`; default is "false". | Boolean | +| star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String | +| count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean | +| soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String | +| atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | +| atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] | +| atac_r3_fastq | Array of read 2 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | +| tar_bwa_reference | TAR file containing the reference index files for BWA-mem alignment for the ATAC pipeline. | File | +| chrom_sizes | File containing the genome chromosome sizes; used to calculate ATAC fragment file metrics. | File | +| adapter_seq_read1 | Optional string describing the adapter sequence for ATAC read 1 paired-end reads to be used during adapter trimming with Cutadapt; default is "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG". | String | +| adapter_seq_read3 | Optional string describing the adapter sequence for ATAC read 2 paired-end reads to be used during adapter trimming with Cutadapt; default is "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG". | String | +| run_cellbender | Optional boolean used to determine if the Optimus (GEX) pipeline should run CellBender on the output gene expression h5ad file, `h5ad_output_file_gex`; default is "false". | Boolean | +| vm_size | String defining the Azure virtual machine family for the workflow (default: "Standard_M128s"). | String | + #### Sample inputs for analyses in a Terra Workspace @@ -120,8 +121,8 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | multimappers_Rescue_matrix | `UniqueAndMult-Rescue.mtx` | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | | multimappers_PropUnique_matrix | `UniqueAndMult-PropUnique.mtx` | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| | gex_aligner_metrics | `.star_metrics.tar` | Text file containing per barcode metrics (`CellReads.stats`) produced by the GEX pipeline STARsolo aligner. | -| mtx_files | `.mtx_files.tar` | TAR file with STARsolo matrix market files (barcodes.tsv, features.tsv, and matrix.mtx) | TAR | | library_metrics | `__library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. | +| mtx_files | `.mtx_files.tar` | TAR file with STARsolo matrix market files (barcodes.tsv, features.tsv, and matrix.mtx) | TAR | | cell_barcodes_csv | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| | checkpoint_file | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | h5_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 2c5979d658..d9b6fccd5d 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -85,6 +85,7 @@ The example configuration files also contain metadata for the reference files, d | Parameter name | Description | Optional attributes (when applicable) | | --- | --- | --- | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | | whitelist | List of known CBs; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) whitelist that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom whitelist can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom whitelist, set the input `ignore_r1_read_length` to "true". | N/A | | read_struct | String describing the structure of reads; the workflow automatically selects the [10x Genomics](https://www.10xgenomics.com/) read structure that corresponds to the v2 or v3 chemistry based on the input `tenx_chemistry_version`. A custom read structure can also be provided if the input data was generated with a chemistry different from 10x Genomics v2 or v3. To use a custom read structure, set the input `force_no_check` to "true". | N/A | | tar_star_reference | TAR file containing a species-specific reference genome and GTF; it is generated using the [BuildIndices workflow](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/build_indices/BuildIndices.wdl). | N/A | diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md index 0c96e00d92..a203d53447 100644 --- a/website/docs/Pipelines/PairedTag_Pipeline/README.md +++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md @@ -5,9 +5,9 @@ slug: /Pipelines/PairedTag_Pipeline/README # Paired-Tag Overview -| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | -| :----: | :---: | :----: | :--------------: | -| [PairedTag_v1.0.0](https://github.com/broadinstitute/warp/releases) | June, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | +| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | +|:---:| :---: | :---: | :---: | +| [PairedTag_v1.0.1](https://github.com/broadinstitute/warp/releases) | June, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the Paired-Tag workflow @@ -91,7 +91,7 @@ The Paired-Tag workflow inputs are specified in JSON configuration files. Exampl The Paired-Tag workflow calls two WARP subworkflows and an additional task which are described briefly in the table below. For more details on each subworkflow and task, see the documentation and WDL scripts linked in the table. | Subworkflow/Task | Software | Description | -| ----------- | -------- | ----------- | +| --- | --- | --- | | Optimus ([WDL](https://github.com/broadinstitute/warp/blob/develop/pipelines/skylab/optimus/Optimus.wdl) and [documentation](../Optimus_Pipeline/README)) | fastqprocess, STARsolo, Emptydrops | Workflow used to analyze 10x single-cell GEX data. | | PairedTagDemultiplex as demultiplex ([WDL](https://github.com/broadinstitute/warp/blob/develop/tasks/skylab/PairedTagUtils.wdl)) | UPStools | Task used to check the length of the read2 FASTQ (should be either 27 or 24 bp). If `preindex` is set to true, the task will perform demultiplexing of the 3-bp sample barcode from the read2 ATAC fastq files and stores it in the readname. It will then perform barcode orientation checking. The ATAC workflow will then add a combined 3 bp sample barcode and cellular barcode to the BB tag of the BAM. If `preindex` is false and then length is 27 bp, the task will perform trimming and subsequent barcode orientation checking. | | ATAC ([WDL](https://github.com/broadinstitute/warp/blob/develop/pipelines/skylab/multiome/atac.wdl) and [documentation](../ATAC/README)) | fastqprocess, bwa-mem, SnapATAC2 | Workflow used to analyze single-nucleus paired-tag DNA (histone modifications) data. | @@ -116,6 +116,7 @@ The Paired-Tag workflow calls two WARP subworkflows and an additional task which | cell_calls_gex | `_gex.emptyDrops` | TSV file containing the EmptyDrops results when the Optimus workflow is run in sc_rna mode. | | h5ad_output_file_gex | `_gex.h5ad` | h5ad (Anndata) file containing the raw cell-by-gene count matrix, gene metrics, cell metrics, and global attributes. See the [Optimus Count Matrix Overview](../Optimus_Pipeline/Loom_schema.md) for more details. | | library_metrics | `_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | | multimappers_EM_matrix | `UniqueAndMult-EM.mtx` | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| | multimappers_Uniform_matrix | `UniqueAndMult-Uniform.mtx` | Optional output produced when `soloMultiMappers` is "Uniform" (default); see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| | multimappers_Rescue_matrix | `UniqueAndMult-Rescue.mtx` | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | diff --git a/website/docs/Pipelines/SlideSeq_Pipeline/README.md b/website/docs/Pipelines/SlideSeq_Pipeline/README.md index 520e03e406..43fa967859 100644 --- a/website/docs/Pipelines/SlideSeq_Pipeline/README.md +++ b/website/docs/Pipelines/SlideSeq_Pipeline/README.md @@ -69,6 +69,7 @@ The Slide-seq workflow inputs are specified in JSON configuration files. Example | output_bam_basename | Optional string used for the output BAM file basename. | String | | count_exons | Optional boolean indicating if the workflow should calculate exon counts; default is set to “true” and produces an h5ad file containing both whole-gene counts and exon counts in an additional layer; when set to “false”, an h5ad file containing only whole-gene counts is produced. | Boolean | | bead_locations | Whitelist TSV file containing bead barcodes and XY coordinates on a single line for each bead; determined by sequencing prior to mRNA transfer and library preparation. | File | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | #### Pseudogene handling diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index c08299743f..909dfe887e 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -82,6 +82,7 @@ The table below details the Multi-snSS2 inputs. The pipeline is designed to take | species | Optional description of the species from which the cells were derived. | Array of strings | | input_name_metadata_field | Optional input describing, when applicable, the metadata field containing the `input_names`. | String | | input_id_metadata_field | Optional string describing, when applicable, the metadata field containing the `input_ids`. | String | +| cloud_provider | String describing the cloud provider that should be used to run the workflow; value should be "gcp" or "azure". | String | ## Multi-snSS2 tasks and tools