-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Draft a WDL for GangSTR. * Simplify joining array items by `,`. * Install GangSTR via Bioconda. * Add a short doc about the workflow & a comment. * Update output, remove a comment & unused struct. * remove unused sets & pinned TRTools version & image clean up. * Use the current latest version of samtools. * Draft splitting the WDL in two: GangSTR & GangSTRScatter. * Bug fixes in GangSTR.wdl * Bug fixes in GangSTRScatter.wdl. * add str_ prefix to docker variables in GangSTR WDLs. * Refactor for clarity & add args documentation. * Add user-defined prefix for GangSTR output.
- Loading branch information
Showing
3 changed files
with
216 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
## Workflow to run GangSTR (https://github.com/gymreklab/GangSTR), a tool | ||
## for computing genome-wide profile of short tandem repeats (STR) from | ||
## short reads. | ||
version 1.0 | ||
|
||
import "Structs.wdl" | ||
|
||
workflow GangSTR { | ||
|
||
input { | ||
File bam_or_cram | ||
File? bam_or_cram_index | ||
File reference_fasta | ||
File? reference_fasta_index | ||
File target_tr_loci_regions_bed | ||
String? output_prefix | ||
String str_docker | ||
RuntimeAttr? runtime_attr | ||
} | ||
parameter_meta { | ||
bam_or_cram: "Set the path to a sorted and indexed bam or cram file generated by an indel-sensitive aligner to be used as input for GangSTR." | ||
bam_or_cram_index: "[Optional] Set the path to the index file of the `bam_or_cram` input." | ||
reference_fasta: "Sets the path to the reference in fasta format." | ||
reference_fasta_index: "[Optional] Sets the path to the index of reference file." | ||
target_tr_loci_regions_bed: "Reference set of regions to genotype represented in bed-like format; see GangSTR documentation for the file structure at: https://github.com/gymreklab/GangSTR#tr-regions---regions" | ||
output_prefix: "[Optional] Set an string to be used as a prefix to the output files. Defaults to the bam_or_cram filename." | ||
str_docker: "Sets the STR docker image." | ||
runtime_attr: "[Optional] Override the default runtime attributes for the GangSTR workflow." | ||
} | ||
Boolean is_bam = | ||
basename(bam_or_cram, ".bam") + ".bam" == basename(bam_or_cram) | ||
File bam_or_cram_index_ = | ||
if defined(bam_or_cram_index) then | ||
select_first([bam_or_cram_index]) | ||
else | ||
bam_or_cram + if is_bam then ".bai" else ".crai" | ||
File reference_fasta_index_ = select_first([ | ||
reference_fasta_index, reference_fasta + ".fai"]) | ||
String output_prefix_ = | ||
if defined(output_prefix) then | ||
select_first([output_prefix]) | ||
else | ||
if is_bam then | ||
basename(bam_or_cram, ".bam") | ||
else | ||
basename(bam_or_cram, ".cram") | ||
call CallGangSTR { | ||
input: | ||
bam_or_cram = bam_or_cram, | ||
bam_or_cram_index = bam_or_cram_index_, | ||
reference_fasta = reference_fasta, | ||
reference_fasta_index = reference_fasta_index_, | ||
target_tr_loci_regions_bed = target_tr_loci_regions_bed, | ||
output_prefix = output_prefix_, | ||
str_docker = str_docker, | ||
runtime_attr_override = runtime_attr | ||
} | ||
output { | ||
File output_vcf = CallGangSTR.output_vcf | ||
File sample_stats = CallGangSTR.sample_stats | ||
File insdata = CallGangSTR.insdata | ||
} | ||
} | ||
|
||
task CallGangSTR { | ||
input { | ||
File bam_or_cram | ||
File bam_or_cram_index | ||
File reference_fasta | ||
File reference_fasta_index | ||
File target_tr_loci_regions_bed | ||
String output_prefix | ||
String str_docker | ||
RuntimeAttr? runtime_attr_override | ||
} | ||
output { | ||
File output_vcf = "${output_prefix}.vcf" | ||
File sample_stats = "${output_prefix}.samplestats.tab" | ||
File insdata = "${output_prefix}.insdata.tab" | ||
} | ||
|
||
command <<< | ||
set -euxo pipefail | ||
|
||
GangSTR \ | ||
--bam ~{bam_or_cram} \ | ||
--ref ~{reference_fasta} \ | ||
--regions ~{target_tr_loci_regions_bed} \ | ||
--out ~{output_prefix} | ||
>>> | ||
|
||
RuntimeAttr runtime_attr_str_profile_default = object { | ||
cpu_cores: 1, | ||
mem_gb: 4, | ||
boot_disk_gb: 10, | ||
preemptible_tries: 3, | ||
max_retries: 1, | ||
disk_gb: 10 + ceil(size([ | ||
bam_or_cram, | ||
reference_fasta, | ||
reference_fasta_index], "GiB")) | ||
} | ||
RuntimeAttr runtime_attr = select_first([ | ||
runtime_attr_override, | ||
runtime_attr_str_profile_default]) | ||
runtime { | ||
docker: str_docker | ||
cpu: runtime_attr.cpu_cores | ||
memory: runtime_attr.mem_gb + " GiB" | ||
disks: "local-disk " + runtime_attr.disk_gb + " HDD" | ||
bootDiskSizeGb: runtime_attr.boot_disk_gb | ||
preemptible: runtime_attr.preemptible_tries | ||
maxRetries: runtime_attr.max_retries | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
version 1.0 | ||
|
||
import "Structs.wdl" | ||
import "GangSTR.wdl" as GangSTR | ||
|
||
workflow GangSTRScatter { | ||
|
||
input { | ||
Array[File] bams_or_crams | ||
Array[File]? bams_or_crams_indexes | ||
File reference_fasta | ||
File? reference_fasta_index | ||
File target_tr_loci_regions_bed | ||
String str_docker | ||
RuntimeAttr? runtime_attr | ||
} | ||
scatter (i in range(length(bams_or_crams))) { | ||
File bam_or_cram_ = bams_or_crams[i] | ||
Boolean is_bam = | ||
basename(bam_or_cram_, ".bam") + ".bam" == basename(bam_or_cram_) | ||
File bam_or_cram_index_ = | ||
if defined(bams_or_crams_indexes) then | ||
select_first([bams_or_crams_indexes])[i] | ||
else | ||
bam_or_cram_ + if is_bam then ".bai" else ".crai" | ||
File reference_fasta_index_ = select_first([ | ||
reference_fasta_index, reference_fasta + ".fai"]) | ||
|
||
call GangSTR.GangSTR as gangSTR { | ||
input: | ||
bam_or_cram=bam_or_cram_, | ||
bam_or_cram_index=bam_or_cram_index_, | ||
reference_fasta=reference_fasta, | ||
reference_fasta_index=reference_fasta_index_, | ||
target_tr_loci_regions_bed=target_tr_loci_regions_bed, | ||
str_docker=str_docker, | ||
runtime_attr=runtime_attr | ||
} | ||
} | ||
output { | ||
Array[File] output_vcfs = gangSTR.output_vcf | ||
Array[File] samples_stats = gangSTR.sample_stats | ||
Array[File] insdatas = gangSTR.insdata | ||
} | ||
} |