diff --git a/README.md b/README.md index a3708e3a1..ad2da3e6e 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ We still encourage members of the community to adapt GATK-SV for non-GCP backend * Refer to Cromwell's [documentation](https://cromwell.readthedocs.io/en/stable/backends/Backends/) for configuration instructions. * The handling and ordering of `glob` commands may differ between platforms. * Shell commands that are potentially destructive to input files (e.g. `rm`, `mv`, `tabix`) can cause unexpected behavior on shared filesystems. Enabling [copy localization](https://cromwell.readthedocs.io/en/stable/Configuring/#local-filesystem-options) may help to more closely replicate the behavior on GCP. -* For clusters that do not support Docker, Singularity is an alternative. See [Cromwell documentation on Singularity(https://cromwell.readthedocs.io/en/stable/tutorials/Containers/#singularity). +* For clusters that do not support Docker, Singularity is an alternative. See [Cromwell documentation on Singularity](https://cromwell.readthedocs.io/en/stable/tutorials/Containers/#singularity). * The GATK-SV pipeline takes advantage of the massive parallelization possible in the cloud. Local backends may not have the resources to execute all of the workflows. Workflows that use fewer resources or that are less parallelized may be more successful. For instance, some users have been able to run [GatherSampleEvidence](#gather-sample-evidence) on a SLURM cluster. ### Data: @@ -475,9 +475,9 @@ Combines variants across multiple batches, resolves complex variants, re-genotyp * Finalized "cleaned" VCF and QC plots ## Module 07 (in development) -Apply downstream filtering steps to the cleaned vcf to further control the false discovery rate; all steps are optional and users should decide based on the specific purpose of their projects. +Apply downstream filtering steps to the cleaned VCF to further control the false discovery rate; all steps are optional and users should decide based on the specific purpose of their projects. -Filterings methods include: +Filtering methods include: * minGQ - remove variants based on the genotype quality across populations. Note: Trio families are required to build the minGQ filtering model in this step. We provide tables pre-trained with the 1000 genomes samples at different FDR thresholds for projects that lack family structures, and they can be found at the paths below. These tables assume that GQ has a scale of [0,999], so they will not work with newer VCFs where GQ has a scale of [0,99]. ``` @@ -493,10 +493,10 @@ gs://gatk-sv-resources-public/hg38/v0/sv-resources/ref-panel/1KG/v2/mingq/1KGP_2 ## AnnotateVcf (in development) *Formerly Module08Annotation* -Add annotations, such as the inferred function and allele frequencies of variants, to final vcf. +Add annotations, such as the inferred function and allele frequencies of variants, to final VCF. Annotations methods include: -* Functional annotation - annotate SVs with inferred function on protein coding regions, regulatory regions such as UTR and Promoters and other non coding elements; +* Functional annotation - annotate SVs with inferred functional consequence on protein-coding regions, regulatory regions such as UTR and promoters, and other non-coding elements. * Allele Frequency annotation - annotate SVs with their allele frequencies across all samples, and samples of specific sex, as well as specific sub-populations. * Allele Frequency annotation with external callset - annotate SVs with the allele frequencies of their overlapping SVs in another callset, eg. gnomad SV callset. diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl index 4fc584080..194abb76c 100644 --- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl +++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl @@ -3,8 +3,6 @@ "AnnotateVcf.vcf_idx" : "${this.vcf_index}", "AnnotateVcf.protein_coding_gtf" : "${workspace.protein_coding_gtf}", - "AnnotateVcf.linc_rna_gtf" : "${workspace.linc_rna_gtf}", - "AnnotateVcf.promoter_bed" : "${workspace.promoter_bed}", "AnnotateVcf.noncoding_bed" : "${workspace.noncoding_bed}", "AnnotateVcf.ref_bed" : "${workspace.external_af_ref_bed}", "AnnotateVcf.ref_prefix" : "${workspace.external_af_ref_bed_prefix}", @@ -19,6 +17,7 @@ "AnnotateVcf.prefix" : "${this.sample_set_id}", + "AnnotateVcf.gatk_docker" : "${workspace.gatk_docker}", "AnnotateVcf.sv_base_mini_docker" : "${workspace.sv_base_mini_docker}", "AnnotateVcf.sv_pipeline_docker" : "${workspace.sv_pipeline_docker}" } \ No newline at end of file diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl index 5b177dfb7..bb4ae8889 100644 --- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl +++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl @@ -3,8 +3,6 @@ "AnnotateVcf.vcf_idx" : "${this.vcf_index}", "AnnotateVcf.protein_coding_gtf" : "${workspace.protein_coding_gtf}", - "AnnotateVcf.linc_rna_gtf" : "${workspace.linc_rna_gtf}", - "AnnotateVcf.promoter_bed" : "${workspace.promoter_bed}", "AnnotateVcf.noncoding_bed" : "${workspace.noncoding_bed}", "AnnotateVcf.ref_bed" : "${workspace.external_af_ref_bed}", "AnnotateVcf.ref_prefix" : "${workspace.external_af_ref_bed_prefix}", @@ -19,6 +17,7 @@ "AnnotateVcf.prefix" : "${this.sample_set_set_id}", + "AnnotateVcf.gatk_docker" : "${workspace.gatk_docker}", "AnnotateVcf.sv_base_mini_docker" : "${workspace.sv_base_mini_docker}", "AnnotateVcf.sv_pipeline_docker" : "${workspace.sv_pipeline_docker}" } \ No newline at end of file diff --git a/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl index effc60cf4..969d33e39 100644 --- a/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl +++ b/inputs/templates/terra_workspaces/cohort_mode/workspace.tsv.tmpl @@ -1,2 +1,2 @@ -workspace:cloud_sdk_docker cnmops_docker condense_counts_docker gatk_docker gatk_docker_pesr_override gcnv_gatk_docker genomes_in_the_cloud_docker linux_docker manta_docker samtools_cloud_docker sv_base_docker sv_base_mini_docker sv_pipeline_base_docker sv_pipeline_docker sv_pipeline_hail_docker sv_pipeline_updates_docker sv_pipeline_qc_docker sv_pipeline_rdtest_docker wham_docker allosome_file autosome_file bin_exclude cnmops_exclude_list cohort_ped_file contig_ploidy_priors copy_number_autosomal_contigs cytobands dbsnp_vcf delly_exclude_intervals_file depth_exclude_list empty_file exclude_intervals_for_gcnv_filter_intervals external_af_ref_bed external_af_ref_bed_prefix genome_file inclusion_bed linc_rna_gtf manta_region_bed mei_bed melt_standard_vcf_header noncoding_bed pesr_exclude_list preprocessed_intervals primary_contigs_fai primary_contigs_list promoter_bed protein_coding_gtf reference_build reference_dict reference_fasta reference_index reference_version rmsk segdups seed_cutoffs unpadded_intervals_file wgd_scoring_mask wham_include_list_bed_file chr_x chr_y google_project_id -{{ dockers.cloud_sdk_docker }} {{ dockers.cnmops_docker }} {{ dockers.condense_counts_docker }} {{ dockers.gatk_docker }} {{ dockers.gatk_docker_pesr_override }} {{ dockers.gatk_docker }} {{ dockers.genomes_in_the_cloud_docker }} {{ dockers.linux_docker }} {{ dockers.manta_docker }} {{ dockers.samtools_cloud_docker }} {{ dockers.sv_base_docker }} {{ dockers.sv_base_mini_docker }} {{ dockers.sv_pipeline_base_docker }} {{ dockers.sv_pipeline_docker }} {{ dockers.sv_pipeline_hail_docker }} {{ dockers.sv_pipeline_updates_docker }} {{ dockers.sv_pipeline_qc_docker }} {{ dockers.sv_pipeline_rdtest_docker }} {{ dockers.wham_docker }} {{ reference_resources.allosome_file }} {{ reference_resources.autosome_file }} {{ reference_resources.bin_exclude }} {{ reference_resources.cnmops_exclude_list }} gs://broad-dsde-methods-eph/ped_1kgp_all.ped {{ reference_resources.contig_ploidy_priors }} {{ reference_resources.copy_number_autosomal_contigs }} {{ reference_resources.cytobands }} {{ reference_resources.dbsnp_vcf }} {{ reference_resources.delly_exclude_intervals_file }} {{ reference_resources.depth_exclude_list }} {{ reference_resources.empty_file }} {{ reference_resources.exclude_intervals_for_gcnv_filter_intervals }} {{ reference_resources.external_af_ref_bed | tojson }} {{ reference_resources.external_af_ref_bed_prefix | tojson }} {{ reference_resources.genome_file }} {{ reference_resources.inclusion_bed }} {{ reference_resources.linc_rna_gtf | tojson }} {{ reference_resources.manta_region_bed }} {{ reference_resources.mei_bed }} {{ reference_resources.melt_std_vcf_header }} {{ reference_resources.noncoding_bed | tojson }} {{ reference_resources.pesr_exclude_list }} {{ reference_resources.preprocessed_intervals }} {{ reference_resources.primary_contigs_fai }} {{ reference_resources.primary_contigs_list }} {{ reference_resources.promoter_bed | tojson }} {{ reference_resources.protein_coding_gtf | tojson }} {{ reference_resources.reference_build }} {{ reference_resources.reference_dict }} {{ reference_resources.reference_fasta }} {{ reference_resources.reference_index }} {{ reference_resources.reference_version }} {{ reference_resources.rmsk }} {{ reference_resources.segdups }} {{ reference_resources.seed_cutoffs }} {{ reference_resources.unpadded_intervals_file }} {{ reference_resources.wgd_scoring_mask }} {{ reference_resources.wham_include_list_bed_file }} {{ reference_resources.chr_x }} {{ reference_resources.chr_y }} {{ cloud_env.terra_billing_project_id }} +workspace:cloud_sdk_docker cnmops_docker condense_counts_docker gatk_docker gatk_docker_pesr_override gcnv_gatk_docker genomes_in_the_cloud_docker linux_docker manta_docker samtools_cloud_docker sv_base_docker sv_base_mini_docker sv_pipeline_base_docker sv_pipeline_docker sv_pipeline_hail_docker sv_pipeline_updates_docker sv_pipeline_qc_docker sv_pipeline_rdtest_docker wham_docker allosome_file autosome_file bin_exclude cnmops_exclude_list cohort_ped_file contig_ploidy_priors copy_number_autosomal_contigs cytobands dbsnp_vcf delly_exclude_intervals_file depth_exclude_list empty_file exclude_intervals_for_gcnv_filter_intervals external_af_ref_bed external_af_ref_bed_prefix genome_file inclusion_bed manta_region_bed mei_bed melt_standard_vcf_header noncoding_bed pesr_exclude_list preprocessed_intervals primary_contigs_fai primary_contigs_list protein_coding_gtf reference_build reference_dict reference_fasta reference_index reference_version rmsk segdups seed_cutoffs unpadded_intervals_file wgd_scoring_mask wham_include_list_bed_file chr_x chr_y google_project_id +{{ dockers.cloud_sdk_docker }} {{ dockers.cnmops_docker }} {{ dockers.condense_counts_docker }} {{ dockers.gatk_docker }} {{ dockers.gatk_docker_pesr_override }} {{ dockers.gatk_docker }} {{ dockers.genomes_in_the_cloud_docker }} {{ dockers.linux_docker }} {{ dockers.manta_docker }} {{ dockers.samtools_cloud_docker }} {{ dockers.sv_base_docker }} {{ dockers.sv_base_mini_docker }} {{ dockers.sv_pipeline_base_docker }} {{ dockers.sv_pipeline_docker }} {{ dockers.sv_pipeline_hail_docker }} {{ dockers.sv_pipeline_updates_docker }} {{ dockers.sv_pipeline_qc_docker }} {{ dockers.sv_pipeline_rdtest_docker }} {{ dockers.wham_docker }} {{ reference_resources.allosome_file }} {{ reference_resources.autosome_file }} {{ reference_resources.bin_exclude }} {{ reference_resources.cnmops_exclude_list }} gs://broad-dsde-methods-eph/ped_1kgp_all.ped {{ reference_resources.contig_ploidy_priors }} {{ reference_resources.copy_number_autosomal_contigs }} {{ reference_resources.cytobands }} {{ reference_resources.dbsnp_vcf }} {{ reference_resources.delly_exclude_intervals_file }} {{ reference_resources.depth_exclude_list }} {{ reference_resources.empty_file }} {{ reference_resources.exclude_intervals_for_gcnv_filter_intervals }} {{ reference_resources.external_af_ref_bed }} {{ reference_resources.external_af_ref_bed_prefix }} {{ reference_resources.genome_file }} {{ reference_resources.inclusion_bed }} {{ reference_resources.manta_region_bed }} {{ reference_resources.mei_bed }} {{ reference_resources.melt_std_vcf_header }} {{ reference_resources.noncoding_bed }} {{ reference_resources.pesr_exclude_list }} {{ reference_resources.preprocessed_intervals }} {{ reference_resources.primary_contigs_fai }} {{ reference_resources.primary_contigs_list }} {{ reference_resources.protein_coding_gtf }} {{ reference_resources.reference_build }} {{ reference_resources.reference_dict }} {{ reference_resources.reference_fasta }} {{ reference_resources.reference_index }} {{ reference_resources.reference_version }} {{ reference_resources.rmsk }} {{ reference_resources.segdups }} {{ reference_resources.seed_cutoffs }} {{ reference_resources.unpadded_intervals_file }} {{ reference_resources.wgd_scoring_mask }} {{ reference_resources.wham_include_list_bed_file }} {{ reference_resources.chr_x }} {{ reference_resources.chr_y }} {{ cloud_env.terra_billing_project_id }} diff --git a/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.no_melt.json.tmpl b/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.no_melt.json.tmpl index 029812bd7..e51acd09f 100644 --- a/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.no_melt.json.tmpl +++ b/inputs/templates/terra_workspaces/single_sample/GATKSVPipelineSingleSample.no_melt.json.tmpl @@ -94,8 +94,6 @@ "GATKSVPipelineSingleSample.run_vcf_qc" : false, "GATKSVPipelineSingleSample.protein_coding_gtf" : "${workspace.reference_protein_coding_gtf}", - "GATKSVPipelineSingleSample.linc_rna_gtf" : "${workspace.reference_linc_rna_gtf}", - "GATKSVPipelineSingleSample.promoter_bed" : "${workspace.reference_promoter_bed}", "GATKSVPipelineSingleSample.noncoding_bed" : "${workspace.reference_noncoding_bed}", "GATKSVPipelineSingleSample.external_af_ref_bed" : "${workspace.reference_external_af_ref_bed}", "GATKSVPipelineSingleSample.external_af_ref_bed_prefix" : "${workspace.reference_external_af_ref_bed_prefix}", diff --git a/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl b/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl index 7638eb980..850413f8d 100644 --- a/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl +++ b/inputs/templates/terra_workspaces/single_sample/workspace.tsv.tmpl @@ -1,2 +1,2 @@ -workspace:cloud_sdk_docker cnmops_docker condense_counts_docker gatk_docker gatk_docker_pesr_override genomes_in_the_cloud_docker linux_docker manta_docker samtools_cloud_docker sv_base_docker sv_base_mini_docker sv_pipeline_base_docker sv_pipeline_docker sv_pipeline_hail_docker sv_pipeline_updates_docker sv_pipeline_qc_docker sv_pipeline_rdtest_docker wham_docker ref_panel_name ref_panel_bincov_matrix ref_panel_contig_ploidy_model_tar ref_panel_cutoffs ref_panel_del_bed ref_panel_dup_bed ref_panel_gcnv_model_tars_list ref_panel_genotype_pesr_pesr_sepcutoff ref_panel_genotype_pesr_depth_sepcutoff ref_panel_genotype_depth_pesr_sepcutoff ref_panel_genotype_depth_depth_sepcutoff ref_panel_ped_file ref_panel_PE_files_list ref_panel_PE_metrics ref_panel_qc_definitions ref_panel_requester_pays_crams ref_panel_samples_list ref_panel_SR_files_list ref_panel_SR_metrics ref_panel_std_manta_vcfs_list ref_panel_std_wham_vcfs_list ref_panel_vcf reference_name reference_allosome_file reference_autosome_file reference_bin_exclude reference_cnmops_exclude_list reference_contig_ploidy_priors reference_copy_number_autosomal_contigs reference_cytobands reference_dbsnp_vcf reference_delly_exclude_intervals_file reference_depth_exclude_list reference_empty_file reference_exclude_intervals_for_gcnv_filter_intervals reference_external_af_ref_bed reference_external_af_ref_bed_prefix reference_genome_file reference_inclusion_bed reference_linc_rna_gtf reference_manta_region_bed reference_mei_bed reference_melt_std_vcf_header reference_noncoding_bed reference_pesr_exclude_list reference_preprocessed_intervals reference_primary_contigs_list reference_primary_contigs_fai reference_promoter_bed reference_protein_coding_gtf reference_dict reference_fasta reference_index reference_version reference_rmsk reference_segdups reference_seed_cutoffs reference_unpadded_intervals_file reference_wgd_scoring_mask reference_wham_include_list_bed_file -{{ dockers.cloud_sdk_docker }} {{ dockers.cnmops_docker }} {{ dockers.condense_counts_docker }} {{ dockers.gatk_docker }} {{ dockers.gatk_docker_pesr_override }} {{ dockers.genomes_in_the_cloud_docker }} {{ dockers.linux_docker }} {{ dockers.manta_docker }} {{ dockers.samtools_cloud_docker }} {{ dockers.sv_base_docker }} {{ dockers.sv_base_mini_docker }} {{ dockers.sv_pipeline_base_docker }} {{ dockers.sv_pipeline_docker }} {{ dockers.sv_pipeline_hail_docker }} {{ dockers.sv_pipeline_updates_docker }} {{ dockers.sv_pipeline_qc_docker }} {{ dockers.sv_pipeline_rdtest_docker }} {{ dockers.wham_docker }} {{ ref_panel.name }} {{ ref_panel.merged_coverage_file }} {{ ref_panel.contig_ploidy_model_tar }} {{ ref_panel.cutoffs }} {{ ref_panel.del_bed }} {{ ref_panel.dup_bed }} {{ ref_panel.gcnv_model_tars_list }} {{ ref_panel.genotype_pesr_pesr_sepcutoff }} {{ ref_panel.genotype_pesr_depth_sepcutoff }} {{ ref_panel.genotype_depth_pesr_sepcutoff }} {{ ref_panel.genotype_depth_depth_sepcutoff }} {{ ref_panel.ped_file }} {{ ref_panel.PE_files_list }} {{ ref_panel.PE_metrics }} {{ reference_resources.single_sample_qc_definitions }} {{ ref_panel.requester_pays_crams }} {{ ref_panel.samples_list }} {{ ref_panel.SR_files_list }} {{ ref_panel.SR_metrics }} {{ ref_panel.std_manta_vcfs_list }} {{ ref_panel.std_wham_vcfs_list }} {{ ref_panel.clean_vcf }} {{ reference_resources.name }} {{ reference_resources.allosome_file }} {{ reference_resources.autosome_file }} {{ reference_resources.bin_exclude }} {{ reference_resources.cnmops_exclude_list }} {{ reference_resources.contig_ploidy_priors }} {{ reference_resources.copy_number_autosomal_contigs }} {{ reference_resources.cytobands }} {{ reference_resources.dbsnp_vcf }} {{ reference_resources.delly_exclude_intervals_file }} {{ reference_resources.depth_exclude_list }} {{ reference_resources.empty_file }} {{ reference_resources.exclude_intervals_for_gcnv_filter_intervals }} {{ reference_resources.external_af_ref_bed }} {{ reference_resources.external_af_ref_bed_prefix }} {{ reference_resources.genome_file }} {{ reference_resources.inclusion_bed }} {{ reference_resources.linc_rna_gtf }} {{ reference_resources.manta_region_bed }} {{ reference_resources.mei_bed }} {{ reference_resources.melt_std_vcf_header }} {{ reference_resources.noncoding_bed }} {{ reference_resources.pesr_exclude_list }} {{ reference_resources.preprocessed_intervals }} {{ reference_resources.primary_contigs_list }} {{ reference_resources.primary_contigs_fai }} {{ reference_resources.promoter_bed }} {{ reference_resources.protein_coding_gtf }} {{ reference_resources.reference_dict }} {{ reference_resources.reference_fasta }} {{ reference_resources.reference_index }} {{ reference_resources.reference_version }} {{ reference_resources.rmsk }} {{ reference_resources.segdups }} {{ reference_resources.seed_cutoffs }} {{ reference_resources.unpadded_intervals_file }} {{ reference_resources.wgd_scoring_mask }} {{ reference_resources.wham_include_list_bed_file }} +workspace:cloud_sdk_docker cnmops_docker condense_counts_docker gatk_docker gatk_docker_pesr_override genomes_in_the_cloud_docker linux_docker manta_docker samtools_cloud_docker sv_base_docker sv_base_mini_docker sv_pipeline_base_docker sv_pipeline_docker sv_pipeline_hail_docker sv_pipeline_updates_docker sv_pipeline_qc_docker sv_pipeline_rdtest_docker wham_docker ref_panel_name ref_panel_bincov_matrix ref_panel_contig_ploidy_model_tar ref_panel_cutoffs ref_panel_del_bed ref_panel_dup_bed ref_panel_gcnv_model_tars_list ref_panel_genotype_pesr_pesr_sepcutoff ref_panel_genotype_pesr_depth_sepcutoff ref_panel_genotype_depth_pesr_sepcutoff ref_panel_genotype_depth_depth_sepcutoff ref_panel_ped_file ref_panel_PE_files_list ref_panel_PE_metrics ref_panel_qc_definitions ref_panel_requester_pays_crams ref_panel_samples_list ref_panel_SR_files_list ref_panel_SR_metrics ref_panel_std_manta_vcfs_list ref_panel_std_wham_vcfs_list ref_panel_vcf reference_name reference_allosome_file reference_autosome_file reference_bin_exclude reference_cnmops_exclude_list reference_contig_ploidy_priors reference_copy_number_autosomal_contigs reference_cytobands reference_dbsnp_vcf reference_delly_exclude_intervals_file reference_depth_exclude_list reference_empty_file reference_exclude_intervals_for_gcnv_filter_intervals reference_external_af_ref_bed reference_external_af_ref_bed_prefix reference_genome_file reference_inclusion_bed reference_manta_region_bed reference_mei_bed reference_melt_std_vcf_header reference_noncoding_bed reference_pesr_exclude_list reference_preprocessed_intervals reference_primary_contigs_list reference_primary_contigs_fai reference_protein_coding_gtf reference_dict reference_fasta reference_index reference_version reference_rmsk reference_segdups reference_seed_cutoffs reference_unpadded_intervals_file reference_wgd_scoring_mask reference_wham_include_list_bed_file +{{ dockers.cloud_sdk_docker }} {{ dockers.cnmops_docker }} {{ dockers.condense_counts_docker }} {{ dockers.gatk_docker }} {{ dockers.gatk_docker_pesr_override }} {{ dockers.genomes_in_the_cloud_docker }} {{ dockers.linux_docker }} {{ dockers.manta_docker }} {{ dockers.samtools_cloud_docker }} {{ dockers.sv_base_docker }} {{ dockers.sv_base_mini_docker }} {{ dockers.sv_pipeline_base_docker }} {{ dockers.sv_pipeline_docker }} {{ dockers.sv_pipeline_hail_docker }} {{ dockers.sv_pipeline_updates_docker }} {{ dockers.sv_pipeline_qc_docker }} {{ dockers.sv_pipeline_rdtest_docker }} {{ dockers.wham_docker }} {{ ref_panel.name }} {{ ref_panel.merged_coverage_file }} {{ ref_panel.contig_ploidy_model_tar }} {{ ref_panel.cutoffs }} {{ ref_panel.del_bed }} {{ ref_panel.dup_bed }} {{ ref_panel.gcnv_model_tars_list }} {{ ref_panel.genotype_pesr_pesr_sepcutoff }} {{ ref_panel.genotype_pesr_depth_sepcutoff }} {{ ref_panel.genotype_depth_pesr_sepcutoff }} {{ ref_panel.genotype_depth_depth_sepcutoff }} {{ ref_panel.ped_file }} {{ ref_panel.PE_files_list }} {{ ref_panel.PE_metrics }} {{ reference_resources.single_sample_qc_definitions }} {{ ref_panel.requester_pays_crams }} {{ ref_panel.samples_list }} {{ ref_panel.SR_files_list }} {{ ref_panel.SR_metrics }} {{ ref_panel.std_manta_vcfs_list }} {{ ref_panel.std_wham_vcfs_list }} {{ ref_panel.clean_vcf }} {{ reference_resources.name }} {{ reference_resources.allosome_file }} {{ reference_resources.autosome_file }} {{ reference_resources.bin_exclude }} {{ reference_resources.cnmops_exclude_list }} {{ reference_resources.contig_ploidy_priors }} {{ reference_resources.copy_number_autosomal_contigs }} {{ reference_resources.cytobands }} {{ reference_resources.dbsnp_vcf }} {{ reference_resources.delly_exclude_intervals_file }} {{ reference_resources.depth_exclude_list }} {{ reference_resources.empty_file }} {{ reference_resources.exclude_intervals_for_gcnv_filter_intervals }} {{ reference_resources.external_af_ref_bed }} {{ reference_resources.external_af_ref_bed_prefix }} {{ reference_resources.genome_file }} {{ reference_resources.inclusion_bed }} {{ reference_resources.manta_region_bed }} {{ reference_resources.mei_bed }} {{ reference_resources.melt_std_vcf_header }} {{ reference_resources.noncoding_bed }} {{ reference_resources.pesr_exclude_list }} {{ reference_resources.preprocessed_intervals }} {{ reference_resources.primary_contigs_list }} {{ reference_resources.primary_contigs_fai }} {{ reference_resources.protein_coding_gtf }} {{ reference_resources.reference_dict }} {{ reference_resources.reference_fasta }} {{ reference_resources.reference_index }} {{ reference_resources.reference_version }} {{ reference_resources.rmsk }} {{ reference_resources.segdups }} {{ reference_resources.seed_cutoffs }} {{ reference_resources.unpadded_intervals_file }} {{ reference_resources.wgd_scoring_mask }} {{ reference_resources.wham_include_list_bed_file }} diff --git a/inputs/templates/test/AnnotateVcf/AnnotateVcf.json.tmpl b/inputs/templates/test/AnnotateVcf/AnnotateVcf.json.tmpl index 33797e5eb..74b441f78 100644 --- a/inputs/templates/test/AnnotateVcf/AnnotateVcf.json.tmpl +++ b/inputs/templates/test/AnnotateVcf/AnnotateVcf.json.tmpl @@ -3,8 +3,6 @@ "AnnotateVcf.vcf_idx" : {{ test_batch.clean_vcf_index | tojson }}, "AnnotateVcf.protein_coding_gtf" : {{ reference_resources.protein_coding_gtf | tojson }}, - "AnnotateVcf.linc_rna_gtf" : {{ reference_resources.linc_rna_gtf | tojson }}, - "AnnotateVcf.promoter_bed" : {{ reference_resources.promoter_bed | tojson }}, "AnnotateVcf.noncoding_bed" : {{ reference_resources.noncoding_bed | tojson }}, "AnnotateVcf.ref_bed" : {{ reference_resources.external_af_ref_bed | tojson }}, "AnnotateVcf.ref_prefix" : {{ reference_resources.external_af_ref_bed_prefix | tojson }}, @@ -19,6 +17,7 @@ "AnnotateVcf.prefix" : {{ test_batch.name | tojson }}, + "AnnotateVcf.gatk_docker":{{ dockers.gatk_docker | tojson }}, "AnnotateVcf.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, "AnnotateVcf.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} } diff --git a/inputs/templates/test/AnnotateVcf/GenerateFunctionalAnnotationResources.json.tmpl b/inputs/templates/test/AnnotateVcf/GenerateFunctionalAnnotationResources.json.tmpl deleted file mode 100644 index 3766a0317..000000000 --- a/inputs/templates/test/AnnotateVcf/GenerateFunctionalAnnotationResources.json.tmpl +++ /dev/null @@ -1,12 +0,0 @@ -{ - "GenerateFunctionalAnnotationResources.gencode_annotation_gtf": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.annotation.gtf.gz", - "GenerateFunctionalAnnotationResources.gencode_pc_transcripts_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_transcripts.fa.gz", - "GenerateFunctionalAnnotationResources.gencode_pc_translations_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_translations.fa.gz", - "GenerateFunctionalAnnotationResources.gencode_transcript_source": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.metadata.Transcript_source", - - "GenerateFunctionalAnnotationResources.promoter_window": 1000, - - "GenerateFunctionalAnnotationResources.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "GenerateFunctionalAnnotationResources.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} -} - diff --git a/inputs/templates/test/AnnotateVcf/PrepareGencode.json.tmpl b/inputs/templates/test/AnnotateVcf/PrepareGencode.json.tmpl deleted file mode 100644 index 2b114b3d5..000000000 --- a/inputs/templates/test/AnnotateVcf/PrepareGencode.json.tmpl +++ /dev/null @@ -1,12 +0,0 @@ -{ - "PrepareGencode.gencode_annotation_gtf": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.annotation.gtf.gz", - "PrepareGencode.gencode_pc_transcripts_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_transcripts.fa.gz", - "PrepareGencode.gencode_pc_translations_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_translations.fa.gz", - "PrepareGencode.gencode_transcript_source": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.metadata.Transcript_source", - - "PrepareGencode.promoter_window": 1000, - - "PrepareGencode.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "PrepareGencode.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} -} - diff --git a/inputs/templates/test/AnnotateVcf/PrepareNoncoding.json.tmpl b/inputs/templates/test/AnnotateVcf/PrepareNoncoding.json.tmpl deleted file mode 100644 index 9649204d3..000000000 --- a/inputs/templates/test/AnnotateVcf/PrepareNoncoding.json.tmpl +++ /dev/null @@ -1,5 +0,0 @@ -{ - "PrepareNoncoding.noncoding_bed_list": "File", - "PrepareNoncoding.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }} -} - diff --git a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl index 244b51ca8..a8f6d3754 100644 --- a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl +++ b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.json.tmpl @@ -93,8 +93,6 @@ "GATKSVPipelineSingleSample.run_vcf_qc" : false, "GATKSVPipelineSingleSample.protein_coding_gtf" : {{ reference_resources.protein_coding_gtf | tojson }}, - "GATKSVPipelineSingleSample.linc_rna_gtf" : {{ reference_resources.linc_rna_gtf | tojson }}, - "GATKSVPipelineSingleSample.promoter_bed" : {{ reference_resources.promoter_bed | tojson }}, "GATKSVPipelineSingleSample.noncoding_bed" : {{ reference_resources.noncoding_bed | tojson }}, "GATKSVPipelineSingleSample.external_af_ref_bed" : {{ reference_resources.external_af_ref_bed | tojson }}, "GATKSVPipelineSingleSample.external_af_ref_bed_prefix" : {{ reference_resources.external_af_ref_bed_prefix | tojson }}, diff --git a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.no_melt.json.tmpl b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.no_melt.json.tmpl index a6a9c5e1d..b7b2a5d43 100644 --- a/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.no_melt.json.tmpl +++ b/inputs/templates/test/GATKSVPipelineSingleSample/GATKSVPipelineSingleSample.no_melt.json.tmpl @@ -94,8 +94,6 @@ "GATKSVPipelineSingleSample.run_vcf_qc" : false, "GATKSVPipelineSingleSample.protein_coding_gtf" : {{ reference_resources.protein_coding_gtf | tojson }}, - "GATKSVPipelineSingleSample.linc_rna_gtf" : {{ reference_resources.linc_rna_gtf | tojson }}, - "GATKSVPipelineSingleSample.promoter_bed" : {{ reference_resources.promoter_bed | tojson }}, "GATKSVPipelineSingleSample.noncoding_bed" : {{ reference_resources.noncoding_bed | tojson }}, "GATKSVPipelineSingleSample.external_af_ref_bed" : {{ reference_resources.external_af_ref_bed | tojson }}, "GATKSVPipelineSingleSample.external_af_ref_bed_prefix" : {{ reference_resources.external_af_ref_bed_prefix | tojson }}, diff --git a/inputs/values/resources_hg38.json b/inputs/values/resources_hg38.json index 9a9140020..fc9f3eff1 100644 --- a/inputs/values/resources_hg38.json +++ b/inputs/values/resources_hg38.json @@ -20,7 +20,6 @@ "external_af_ref_bed_prefix" : "gnomad_v2.1_sv", "genome_file" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/hg38.genome", "inclusion_bed" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/hg38_primary_contigs.bed", - "linc_rna_gtf" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/gencode.lincRNA.gtf.gz", "manta_region_bed" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/primary_contigs_plus_mito.bed.gz", "mei_bed" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/mei_hg38.bed.gz", "melt_std_vcf_header" : "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/melt_standard_vcf_header.txt", @@ -29,8 +28,7 @@ "preprocessed_intervals" : "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/preprocessed_intervals.interval_list", "primary_contigs_fai" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/contig.fai", "primary_contigs_list" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/primary_contigs.list", - "promoter_bed" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/promoter.bed", - "protein_coding_gtf" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/gencode.canonical_pc.gtf.gz", + "protein_coding_gtf" : "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/MANE.GRCh38.v0.95.select_ensembl_genomic.gtf", "reference_build" : "hg38", "reference_dict" : "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", "reference_fasta" : "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", diff --git a/wdl/AnnotateChromosome.wdl b/wdl/AnnotateChromosome.wdl deleted file mode 100644 index a5839c9ef..000000000 --- a/wdl/AnnotateChromosome.wdl +++ /dev/null @@ -1,206 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -# After VCF and dictionary are scattered per chromosome, apply annotation to scattered per-chromosome VCF -workflow AnnotateChromosome { - - input { - - String prefix - File vcf - File protein_coding_gtf - File linc_rna_gtf - File promoter_bed - File noncoding_bed - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_annotate_intervals - RuntimeAttr? runtime_attr_merge_annotations - } - - String coding_flag = "--gencode" - String noncoding_flag = "--noncoding" - - call AnnotateIntervals as AnnotateProteinCoding { - input: - vcf = vcf, - intervals_file = protein_coding_gtf, - intervals_flag = coding_flag, - prefix = prefix, - intervals_set = "protein_coding", - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_annotate_intervals - } - - call AnnotateIntervals as AnnotateLincRNA { - input: - vcf = vcf, - intervals_file = linc_rna_gtf, - intervals_flag = coding_flag, - prefix = prefix, - intervals_set = "lincRNA", - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_annotate_intervals - } - - call AnnotateIntervals as AnnotatePromoters { - input: - vcf = vcf, - intervals_file = promoter_bed, - intervals_flag = noncoding_flag, - prefix = prefix, - intervals_set = "promoters", - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_annotate_intervals - } - - call AnnotateIntervals as annotate_noncoding_elements { - input: - vcf = vcf, - intervals_file = noncoding_bed, - intervals_flag = noncoding_flag, - prefix = prefix, - intervals_set = "noncoding", - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_annotate_intervals - } - - - - call MergeAnnotations { - input: - vcf = vcf, - annotated_vcfs = [AnnotateProteinCoding.annotated_vcf, AnnotateLincRNA.annotated_vcf, AnnotatePromoters.annotated_vcf, - annotate_noncoding_elements.annotated_vcf], - prefix = prefix, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_merge_annotations - } - - output { - File annotated_vcf = MergeAnnotations.annotated_vcf - File annotated_vcf_idx = MergeAnnotations.annotated_vcf_idx - } -} - -# Apply annoattion -task AnnotateIntervals { - - input { - - File vcf - File intervals_file # gtf or bed file - String intervals_flag # "--gencode" or "--noncoding" - String prefix - String intervals_set - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_override - } - - output { - File annotated_vcf = "${prefix}.${intervals_set}.vcf.gz" - } - - command <<< - - set -euo pipefail - - svtk annotate \ - ~{intervals_flag} ~{intervals_file} \ - ~{vcf} \ - ~{prefix}.~{intervals_set}.vcf - - orig=$( zcat ~{vcf} | cut -f1 | grep -cv "^#" || true ) - new=$( cut -f1 ~{prefix}.~{intervals_set}.vcf | grep -cv "^#" || true ) - - if [ "$new" -ne "$orig" ]; then - echo "ANNOTATED VCF DOES NOT HAVE THE SAME NUMBER OF RECORDS AS INPUT VCF ($new vs $orig)" - exit 1 - fi - - bgzip -f ~{prefix}.~{intervals_set}.vcf - - >>> - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_pipeline_docker - } -} - -# Merge annotations from different dictionaries (genocode, lincRNA, promoters, potentially more in the future) -task MergeAnnotations { - - input { - - File vcf - Array[File] annotated_vcfs - String prefix - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_override - } - - output { - File annotated_vcf = "${prefix}.annotated.vcf.gz" - File annotated_vcf_idx = "${prefix}.annotated.vcf.gz.tbi" - } - - command <<< - - set -euo pipefail - - /opt/sv-pipeline/05_annotation/scripts/merge_annotations.py \ - ~{vcf} \ - ~{sep=" " annotated_vcfs} \ - ~{prefix}.annotated.vcf - - bgzip ~{prefix}.annotated.vcf - tabix ~{prefix}.annotated.vcf.gz - - orig=$( zcat ~{vcf} | cut -f1 | grep -cv "^#" || true) - new=$( zcat ~{prefix}.annotated.vcf.gz | cut -f1 | grep -cv "^#" || true) - if [ "$new" -ne "$orig" ]; then - echo "ANNOTATED VCF DOES NOT HAVE THE SAME NUMBER OF RECORDS AS INPUT VCF ($new vs $orig)" - exit 1 - fi - - >>> - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 8, - disk_gb: 250, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_pipeline_docker - } -} diff --git a/wdl/AnnotateFunctionalConsequences.wdl b/wdl/AnnotateFunctionalConsequences.wdl new file mode 100644 index 000000000..d8e1d158c --- /dev/null +++ b/wdl/AnnotateFunctionalConsequences.wdl @@ -0,0 +1,99 @@ +version 1.0 + +import "Structs.wdl" + +workflow AnnotateFunctionalConsequences { + input { + File vcf + File vcf_index + String prefix + + File protein_coding_gtf + File? noncoding_bed + Int? promoter_window + Int? max_breakend_as_cnv_length + String? additional_args + + String gatk_docker + RuntimeAttr? runtime_attr_svannotate + } + + call SVAnnotate { + input: + vcf = vcf, + vcf_index = vcf_index, + prefix = prefix, + protein_coding_gtf = protein_coding_gtf, + noncoding_bed = noncoding_bed, + promoter_window = promoter_window, + max_breakend_as_cnv_length = max_breakend_as_cnv_length, + additional_args = additional_args, + gatk_docker = gatk_docker, + runtime_attr_override = runtime_attr_svannotate + } + + output { + File annotated_vcf = SVAnnotate.annotated_vcf + File annotated_vcf_index = SVAnnotate.annotated_vcf_index + } +} + +task SVAnnotate { + input { + File vcf + File vcf_index + String prefix + + File protein_coding_gtf + File? noncoding_bed + Int? promoter_window + Int? max_breakend_as_cnv_length + String? additional_args + + String gatk_docker + RuntimeAttr? runtime_attr_override + } + + RuntimeAttr default_attr = object { + cpu_cores: 1, + mem_gb: 3.75, + disk_gb: 10, + boot_disk_gb: 10, + preemptible_tries: 3, + max_retries: 1 + } + RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) + + Float mem_gb = select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + Int java_mem_mb = ceil(mem_gb * 1000 * 0.7) + + String outfile = "~{prefix}.annotated.vcf.gz" + + output { + File annotated_vcf = "~{outfile}" + File annotated_vcf_index = "~{outfile}.tbi" + } + command <<< + + set -euo pipefail + + gatk --java-options "-Xmx~{java_mem_mb}m" SVAnnotate \ + -V ~{vcf} \ + -O ~{outfile} \ + --protein-coding-gtf ~{protein_coding_gtf} \ + ~{"--non-coding-bed " + noncoding_bed} \ + ~{"--promoter-window-length " + promoter_window} \ + ~{"--max-breakend-as-cnv-length" + max_breakend_as_cnv_length} \ + ~{additional_args} + + >>> + runtime { + cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) + memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) + docker: gatk_docker + preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) + maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) + } +} \ No newline at end of file diff --git a/wdl/AnnotateVcf.wdl b/wdl/AnnotateVcf.wdl index 3fbd71ee7..b95e5c55c 100644 --- a/wdl/AnnotateVcf.wdl +++ b/wdl/AnnotateVcf.wdl @@ -1,6 +1,6 @@ version 1.0 -import "ScatterAnnotateVcfByChrom.wdl" as ann +import "AnnotateFunctionalConsequences.wdl" as func import "PruneAndAddVafs.wdl" as pav import "AnnotateExternalAF.wdl" as eaf @@ -13,9 +13,10 @@ workflow AnnotateVcf { String prefix File protein_coding_gtf - File linc_rna_gtf - File promoter_bed - File noncoding_bed + File? noncoding_bed + Int? promoter_window + Int? max_breakend_as_cnv_length + String? svannotate_additional_args Int max_shards_per_chrom_step1 Int min_records_per_shard_step1 @@ -31,10 +32,9 @@ workflow AnnotateVcf { String sv_base_mini_docker String sv_pipeline_docker + String gatk_docker - RuntimeAttr? runtime_attr_annotate_intervals - RuntimeAttr? runtime_attr_merge_annotations - RuntimeAttr? runtime_attr_subset_vcf + RuntimeAttr? runtime_attr_svannotate RuntimeAttr? runtime_attr_concat_vcfs RuntimeAttr? runtime_attr_prune_vcf RuntimeAttr? runtime_attr_shard_vcf @@ -49,28 +49,24 @@ workflow AnnotateVcf { RuntimeAttr? runtime_attr_select_matched_svs } - call ann.ScatterAnnotateVcfByChrom as ScatterAnnotateVcfByChrom { + call func.AnnotateFunctionalConsequences { input: - vcf = vcf, - vcf_idx = vcf_idx, - prefix = prefix, - contig_list = contig_list, + vcf = vcf, + vcf_index = vcf_idx, + prefix = prefix, protein_coding_gtf = protein_coding_gtf, - linc_rna_gtf = linc_rna_gtf, - promoter_bed = promoter_bed, - noncoding_bed = noncoding_bed, - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_annotate_intervals = runtime_attr_annotate_intervals, - runtime_attr_merge_annotations = runtime_attr_merge_annotations, - runtime_attr_subset_vcf = runtime_attr_subset_vcf, - runtime_attr_concat_vcfs = runtime_attr_concat_vcfs + noncoding_bed = noncoding_bed, + promoter_window = promoter_window, + max_breakend_as_cnv_length = max_breakend_as_cnv_length, + additional_args = svannotate_additional_args, + gatk_docker = gatk_docker, + runtime_attr_svannotate = runtime_attr_svannotate } call pav.PruneAndAddVafs as PruneAndAddVafs { input: - vcf = ScatterAnnotateVcfByChrom.annotated_vcf, - vcf_idx = ScatterAnnotateVcfByChrom.annotated_vcf_idx, + vcf = AnnotateFunctionalConsequences.annotated_vcf, + vcf_idx = AnnotateFunctionalConsequences.annotated_vcf_index, prefix = prefix, sample_pop_assignments = sample_pop_assignments, prune_list = prune_list, diff --git a/wdl/GATKSVPipelineSingleSample.wdl b/wdl/GATKSVPipelineSingleSample.wdl index 2e74ce741..2061b5375 100644 --- a/wdl/GATKSVPipelineSingleSample.wdl +++ b/wdl/GATKSVPipelineSingleSample.wdl @@ -554,9 +554,9 @@ workflow GATKSVPipelineSingleSample { ############################################################ File protein_coding_gtf - File linc_rna_gtf - File promoter_bed File noncoding_bed + Int? promoter_window + Int? max_breakend_as_cnv_length Int annotation_sv_per_shard Int annotation_max_shards_per_chrom_step1 Int annotation_min_records_per_shard_step1 @@ -565,6 +565,8 @@ workflow GATKSVPipelineSingleSample { String? external_af_ref_bed_prefix # name of external AF bed file call set Array[String]? external_af_population # populations to annotate external AFs (required if ref_bed set, use "ALL" for all) + RuntimeAttr? runtime_attr_svannotate + ############################################################ ## Single sample filtering ############################################################ @@ -1362,9 +1364,9 @@ workflow GATKSVPipelineSingleSample { prefix = batch, contig_list = primary_contigs_list, protein_coding_gtf = protein_coding_gtf, - linc_rna_gtf = linc_rna_gtf, - promoter_bed = promoter_bed, noncoding_bed = noncoding_bed, + promoter_window = promoter_window, + max_breakend_as_cnv_length = max_breakend_as_cnv_length, ref_bed = external_af_ref_bed, ref_prefix = external_af_ref_bed_prefix, population = external_af_population, @@ -1372,7 +1374,9 @@ workflow GATKSVPipelineSingleSample { max_shards_per_chrom_step1 = annotation_max_shards_per_chrom_step1, min_records_per_shard_step1 = annotation_min_records_per_shard_step1, sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker + sv_pipeline_docker = sv_pipeline_docker, + gatk_docker = gatk_docker, + runtime_attr_svannotate = runtime_attr_svannotate } call SingleSampleFiltering.VcfToBed as VcfToBed { diff --git a/wdl/GenerateFunctionalAnnotationResources.wdl b/wdl/GenerateFunctionalAnnotationResources.wdl deleted file mode 100644 index b39d1048d..000000000 --- a/wdl/GenerateFunctionalAnnotationResources.wdl +++ /dev/null @@ -1,72 +0,0 @@ -version 1.0 - -import "PrepareGencode.wdl" as pg -# import "PrepareNoncoding.wdl" as pn - -# Workflow for preprocessing for functional annotation -workflow GenerateFunctionalAnnotationResources { - input { - - ### args for PrepareGencode - File gencode_annotation_gtf # Gencode annotation GTF - File gencode_pc_translations_fa # Gencode protein-coding translation fasta - File gencode_pc_transcripts_fa # Gencode protein-coding transcript fasta - File gencode_transcript_source # Gencode transcript source metadata - Int promoter_window # Window upstream of TSS to consider as promoter region - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_get_canonical_transcripts - RuntimeAttr? runtime_attr_make_canonical_gtf - RuntimeAttr? runtime_attr_make_promoters - RuntimeAttr? runtime_attr_subset_gtf - -# ### args for PrepareNoncoding -# File noncoding_bed_list -# -# RuntimeAttr? runtime_attr_clean_noncoding_bed -# RuntimeAttr? runtime_attr_make_noncoding_bed - } - - call pg.PrepareGencode as PrepareGencode { - input: - - gencode_annotation_gtf = gencode_annotation_gtf, - gencode_pc_translations_fa = gencode_pc_translations_fa, - gencode_pc_transcripts_fa = gencode_pc_transcripts_fa, - gencode_transcript_source = gencode_transcript_source, - promoter_window = promoter_window, - - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - - runtime_attr_get_canonical_transcripts = runtime_attr_get_canonical_transcripts, - runtime_attr_make_canonical_gtf = runtime_attr_make_canonical_gtf, - runtime_attr_make_promoters = runtime_attr_make_promoters, - runtime_attr_subset_gtf = runtime_attr_subset_gtf - } - -# call pn.PrepareNoncoding as PrepareNoncoding { -# input: -# -# noncoding_bed_list = noncoding_bed_list, -# -# sv_base_mini_docker = sv_base_mini_docker, -# -# runtime_attr_clean_noncoding_bed = runtime_attr_clean_noncoding_bed, -# runtime_attr_make_noncoding_bed = runtime_attr_make_noncoding_bed -# } - - output { - - File canonical_gtf = PrepareGencode.canonical_gtf - File canonical_promoters = PrepareGencode.canonical_promoters - File antisense_gtf = PrepareGencode.antisense_gtf - File lincRNA_gtf = PrepareGencode.lincRNA_gtf - File processed_transcript_gtf = PrepareGencode.processed_transcript_gtf - File pseudogene_gtf = PrepareGencode.pseudogene_gtf - - # File noncoding_bed = PrepareNoncoding.noncoding_bed - } -} diff --git a/wdl/PrepareGencode.wdl b/wdl/PrepareGencode.wdl deleted file mode 100644 index 3bf1f6be3..000000000 --- a/wdl/PrepareGencode.wdl +++ /dev/null @@ -1,309 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -# Prepare input GTF according to other input dictionaries and output GTF for annotation sub-module -workflow PrepareGencode { - - input { - - File gencode_annotation_gtf # Gencode annotation GTF - File gencode_pc_translations_fa # Gencode protein-coding translation fasta - File gencode_pc_transcripts_fa # Gencode protein-coding transcript fasta - File gencode_transcript_source # Gencode transcript source metadata - Int promoter_window # Window upstream of TSS to consider as promoter region - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_get_canonical_transcripts - RuntimeAttr? runtime_attr_make_canonical_gtf - RuntimeAttr? runtime_attr_make_promoters - RuntimeAttr? runtime_attr_subset_gtf - } - - call GetCanonicalTranscripts { - input: - - gencode_annotation_gtf = gencode_annotation_gtf, - gencode_pc_translations_fa = gencode_pc_translations_fa, - gencode_pc_transcripts_fa = gencode_pc_transcripts_fa, - gencode_transcript_source = gencode_transcript_source, - - sv_pipeline_docker = sv_pipeline_docker, - - runtime_attr_override = runtime_attr_get_canonical_transcripts - } - - call MakeCanonicalGtf { - input: - - gencode_annotation_gtf = gencode_annotation_gtf, - canonical_transcripts = GetCanonicalTranscripts.canon_tx, - - sv_pipeline_docker = sv_pipeline_docker, - - runtime_attr_override = runtime_attr_make_canonical_gtf - } - - call MakePromoters { - input: - - gtf = MakeCanonicalGtf.canon_gtf, - window = promoter_window, - - sv_pipeline_docker = sv_pipeline_docker, - - runtime_attr_override = runtime_attr_make_promoters - } - - call SubsetGtf as SubsetAntisense { - input: - - annotation_gtf = gencode_annotation_gtf, - subset = "antisense", - - sv_base_mini_docker = sv_base_mini_docker, - - runtime_attr_override = runtime_attr_subset_gtf - } - - call SubsetGtf as SubsetLincRNA { - input: - - annotation_gtf = gencode_annotation_gtf, - subset = "lincRNA", - - sv_base_mini_docker = sv_base_mini_docker, - - runtime_attr_override = runtime_attr_subset_gtf - } - - call SubsetGtf as SubsetProcessedTranscript { - input: - - annotation_gtf = gencode_annotation_gtf, - subset = "processed_transcript", - - sv_base_mini_docker = sv_base_mini_docker, - - runtime_attr_override = runtime_attr_subset_gtf - } - - call SubsetGtf as SubsetPseudogene { - input: - - annotation_gtf = gencode_annotation_gtf, - subset = "pseudogene", - - sv_base_mini_docker = sv_base_mini_docker, - - runtime_attr_override = runtime_attr_subset_gtf - } - - output { - File canonical_gtf = MakeCanonicalGtf.canon_gtf - File canonical_promoters = MakePromoters.promoter_bed - File antisense_gtf = SubsetAntisense.gtf - File lincRNA_gtf = SubsetLincRNA.gtf - File processed_transcript_gtf = SubsetProcessedTranscript.gtf - File pseudogene_gtf = SubsetPseudogene.gtf - } -} - -task SubsetGtf { - - input { - - File annotation_gtf - String subset - - String sv_base_mini_docker - - RuntimeAttr? runtime_attr_override - } - - output { - File gtf = "gencode.${subset}.gtf.gz" - } - - command <<< - - set -euo pipefail - - zcat ~{annotation_gtf} \ - | grep -e "gene_type \"~{subset}\"" - \ - | sort -k1,1V -k4,4n \ - | bgzip -c \ - > gencode.~{subset}.gtf.gz - - >>> - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - # docker: "gatksv/sv-base-mini:v0.1", - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: "gatksv/sv-base-mini:v0.1" #select_first([runtime_attr.docker, default_attr.docker]) - } -} - -task GetCanonicalTranscripts { - - input { - - File gencode_annotation_gtf - File gencode_pc_translations_fa - File gencode_pc_transcripts_fa - File gencode_transcript_source - - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_override - } - - output { - File canon_tx = "gencode.canonical_transcripts.txt" - } - - command <<< - - set -euo pipefail - /opt/sv-pipeline/05_annotation/scripts/get_canonical_transcripts.py \ - ~{gencode_annotation_gtf} \ - ~{gencode_pc_translations_fa} \ - ~{gencode_pc_transcripts_fa} \ - ~{gencode_transcript_source} \ - gencode.canonical_transcripts.txt - - >>> - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - # docker: "gatksv/sv-pipeline:v0.1", - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_pipeline_docker #select_first([runtime_attr.docker, default_attr.docker]) - } -} - -task MakeCanonicalGtf { - input { - File gencode_annotation_gtf - File canonical_transcripts - - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_override - } - - output { - File canon_gtf = "gencode.canonical_pc.gtf.gz" - } - - command <<< - - set -euo pipefail - - cat \ - <(cut -f2 ~{canonical_transcripts} | sed -e '1d' | fgrep -w -f - <(zcat ~{gencode_annotation_gtf})) \ - <(cut -f1 ~{canonical_transcripts} | sed -e '1d' | fgrep -w -f - <(zcat ~{gencode_annotation_gtf}) | awk '($3=="gene")') \ - | sort -k10,10 \ - | /opt/sv-pipeline/05_annotation/scripts/filter_UTRs.py stdin stdout \ - | sort -k1,1V -k4,4n \ - | bgzip -c \ - > gencode.canonical_pc.gtf.gz - - >>> - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - # docker: "gatksv/sv-pipeline:v0.1", - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_pipeline_docker #select_first([runtime_attr.docker, default_attr.docker]) - } -} - -task MakePromoters { - - input { - - File gtf - Int window - - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_override - } - - output { - File promoter_bed = "gencode.canonical_pc.promoter.bed" - } - - command <<< - - /opt/sv-pipeline/05_annotation/scripts/make_promoters.sh ~{gtf} ~{window} > gencode.canonical_pc.promoter.bed - - >>> - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - # docker: "gatksv/sv-pipeline:v0.1", - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_pipeline_docker #select_first([runtime_attr.docker, default_attr.docker]) - } -} diff --git a/wdl/PrepareNoncoding.wdl b/wdl/PrepareNoncoding.wdl deleted file mode 100644 index 273f66ef5..000000000 --- a/wdl/PrepareNoncoding.wdl +++ /dev/null @@ -1,137 +0,0 @@ -version 1.0 - -import "Structs.wdl" - -# Prepare input BED file list containing intervals of noncoding regions and output one BED file for use in annotation sub-module -workflow PrepareNoncoding { - - input { - - File noncoding_bed_list - - String sv_base_mini_docker - - RuntimeAttr? runtime_attr_clean_noncoding_bed - RuntimeAttr? runtime_attr_make_noncoding_bed - } - - Array[File] noncoding_beds = read_lines(noncoding_bed_list) - - scatter (bed in noncoding_beds) { - - call CleanNoncodingBed { - input: - - bed = bed, - - sv_base_mini_docker = sv_base_mini_docker, - - runtime_attr_override = runtime_attr_clean_noncoding_bed - } - } - - call MakeNoncodingBed { - input: - - beds = CleanNoncodingBed.cleaned_bed, - - runtime_attr_override = runtime_attr_make_noncoding_bed - } - - output { - File noncoding_bed = MakeNoncodingBed.noncoding_bed - } -} - -task CleanNoncodingBed { - - input { - - File bed - - String sv_base_mini_docker - - RuntimeAttr? runtime_attr_override - } - - String name = basename(bed, ".bed") - - output { - File cleaned_bed = "${name}.bed" - } - - command <<< - - set -euo pipefail - cat ~{bed} \ - | cut -f -3 \ - | sort -k1,1V -k2,2n \ - | bedtools merge -i stdin \ - | awk -v OFS="\t" '{print $0, "~{name}"}' \ - > ~{name}.bed - - >>> - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - # docker: "gatksv/sv-base-mini:v0.1" - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_base_mini_docker # select_first([runtime_attr.docker, default_attr.docker]) - } -} - -task MakeNoncodingBed { - - input { - - Array[File] beds - - RuntimeAttr? runtime_attr_override - } - - output { - File noncoding_bed = "noncoding_elements.bed" - } - - command <<< - - sort -k1,1V -k2,2n -m ~{sep=" " beds} > noncoding_elements.bed - - >>> - - ######################### - # note here we did not make docker an input because it is really a small task (only sort), I actually suspect alpine is enough - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 10, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - # docker: "ubuntu:18.10" - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: "ubuntu:18.10" # select_first([runtime_attr.docker, default_attr.docker]) - } -} diff --git a/wdl/ScatterAnnotateVcfByChrom.wdl b/wdl/ScatterAnnotateVcfByChrom.wdl deleted file mode 100644 index 00aba2b1c..000000000 --- a/wdl/ScatterAnnotateVcfByChrom.wdl +++ /dev/null @@ -1,140 +0,0 @@ -# Workflow to parallelize VCF annotation by chromosome - -version 1.0 - -import "TasksMakeCohortVcf.wdl" as MiniTasks -import "AnnotateChromosome.wdl" as annotate_by_chrom - -# Scatter VCF and apply prepared annotations -workflow ScatterAnnotateVcfByChrom { - - input { - - File vcf - String prefix - File vcf_idx - File contig_list - File protein_coding_gtf - File linc_rna_gtf - File promoter_bed - File noncoding_bed - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_annotate_intervals - RuntimeAttr? runtime_attr_merge_annotations - RuntimeAttr? runtime_attr_subset_vcf - RuntimeAttr? runtime_attr_concat_vcfs - } - - Array[Array[String]] contigs = read_tsv(contig_list) - - # Annotate, scattered by chromosome - scatter (contig in contigs) { - # Remote tabix each chromosome - call SubsetVcf { - input: - vcf = vcf, - vcf_idx = vcf_idx, - contig = contig[0], - prefix = "${prefix}.${contig[0]}", - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_subset_vcf - } - - # Annotate per chromosome - call annotate_by_chrom.AnnotateChromosome as AnnotateChromosome { - input: - vcf = SubsetVcf.subsetted_vcf, - prefix = "${prefix}.${contig[0]}", - protein_coding_gtf = protein_coding_gtf, - linc_rna_gtf = linc_rna_gtf, - promoter_bed = promoter_bed, - noncoding_bed = noncoding_bed, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_annotate_intervals = runtime_attr_annotate_intervals, - runtime_attr_merge_annotations = runtime_attr_merge_annotations - } - } - - # Merge integrated vcfs across chromosomes - call MiniTasks.ConcatVcfs as ConcatVcfs { - input: - vcfs = AnnotateChromosome.annotated_vcf, - vcfs_idx = AnnotateChromosome.annotated_vcf_idx, - outfile_prefix = "${prefix}.annotated", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_concat_vcfs - } - - output { - File annotated_vcf = ConcatVcfs.concat_vcf - File annotated_vcf_idx = ConcatVcfs.concat_vcf_idx - } -} - -# Scatter VCF by chromosome -task SubsetVcf { - - input { - - File vcf - File vcf_idx - String contig - String prefix - - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_override - } - - parameter_meta { - vcf: { - localization_optional: true - } - vcf_idx: { - localization_optional: true - } - } - - output { - File subsetted_vcf = "${prefix}.${contig}.vcf.gz" - File subsetted_vcf_idx = "${prefix}.${contig}.vcf.gz.tbi" - } - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - Float mem_gb = select_first([runtime_attr.mem_gb, default_attr.mem_gb]) - Int java_mem_mb = ceil(mem_gb * 1000 * 0.8) - - command <<< - - set -euo pipefail - - java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} SelectVariants \ - -V "~{vcf}" \ - -L "~{contig}" \ - -O ~{prefix}.~{contig}.vcf.gz - - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: mem_gb + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_pipeline_docker - } -}