Skip to content

Commit

Permalink
Merge pull request #19 from avilab/callvariants
Browse files Browse the repository at this point in the history
Callvariants
  • Loading branch information
tpall authored Jun 11, 2021
2 parents 4d82e33 + ceb767c commit b976795
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 189 deletions.
203 changes: 15 additions & 188 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ rule filter:
log:
"logs/{sample}/{run}/filter.log",
params:
extra="k=31 ref=artifacts,phix ordered cardinality",
extra="k=31 ref=artifacts,phix ordered cardinality pigz=32 unpigz zl=8",
resources:
runtime=120,
mem_mb=4000,
Expand All @@ -115,7 +115,7 @@ rule correct1:
output:
out=temp("results/{sample}/{run}/ecco.fq"),
params:
extra="ecco mix vstrict ordered",
extra="ecco mix vstrict ordered pigz=32 unpigz zl=8",
log:
"logs/{sample}/{run}/correct1.log",
resources:
Expand All @@ -132,7 +132,7 @@ rule correct2:
output:
out="results/{sample}/{run}/ecct.fq",
params:
extra="mode=correct k=50 ordered",
extra="mode=correct k=50 ordered pigz=32 unpigz zl=8",
log:
"logs/{sample}/{run}/correct2.log",
resources:
Expand Down Expand Up @@ -165,7 +165,7 @@ rule refgenome:
"minimal"
params:
extra=(
lambda wildcards: f"append maxindel=200 strictmaxindel usemodulo slow k=12 nodisk RGPL={PLATFORM} RGID={wildcards.sample} RGSM={wildcards.sample}"
lambda wildcards: f"append maxindel=200 usemodulo slow k=12 nodisk RGPL={PLATFORM} RGID={wildcards.sample} RGSM={wildcards.sample} pigz=16 unpigz zl=8"
),
resources:
runtime=120,
Expand All @@ -175,196 +175,23 @@ rule refgenome:
f"{WRAPPER_PREFIX}/v0.6/bbtools/bbwrap"


rule samtools_fixmate:
input:
rules.refgenome.output.out,
output:
temp("results/{sample}/fixmate.bam"),
params:
extra="",
resources:
runtime=120,
mem_mb=4000,
threads: 4
wrapper:
"0.68.0/bio/samtools/fixmate"


rule samtools_sort:
input:
rules.samtools_fixmate.output[0],
output:
temp("results/{sample}/sorted.bam"),
log:
"logs/{sample}/samtools_sort.log",
params:
extra=lambda wildcards, resources: f"-m {resources.mem_mb}M",
tmp_dir="/tmp/",
resources:
mem_mb=4000,
runtime=lambda wildcards, attempt: attempt * 240,
threads: 4
wrapper:
"0.68.0/bio/samtools/sort"


rule mark_duplicates:
input:
rules.samtools_sort.output[0],
output:
bam=temp("results/{sample}/dedup.bam"),
metrics="results/{sample}/dedup.txt",
log:
"logs/{sample}/dedup.log",
params:
"CREATE_INDEX='true' USE_JDK_DEFLATER='true' USE_JDK_INFLATER='true' REMOVE_DUPLICATES='true' ASSUME_SORTED='true' DUPLICATE_SCORING_STRATEGY='SUM_OF_BASE_QUALITIES' OPTICAL_DUPLICATE_PIXEL_DISTANCE='100' VALIDATION_STRINGENCY='LENIENT' QUIET='true' VERBOSITY='ERROR'",
resources:
runtime=120,
mem_mb=4000,
threads: 4
wrapper:
"0.68.0/bio/picard/markduplicates"


rule indelqual1:
"""
Insert indel qualities.
"""
input:
ref="resources/refseq/NC_045512.2/sequences.fa",
bam=rules.mark_duplicates.output.bam,
output:
temp("results/{sample}/indelqual1.bam"),
log:
"logs/{sample}/indelqual1.log",
params:
extra="--verbose",
resources:
runtime=120,
mem_mb=4000,
threads: 4
wrapper:
f"{WRAPPER_PREFIX}/v0.6/lofreq/indelqual"


rule lofreq1:
"""
Variant calling.
"""
rule callvariants:
input:
input=rules.refgenome.output.out,
ref="resources/refseq/NC_045512.2/sequences.fa",
bam=rules.indelqual1.output[0],
output:
temp("results/{sample}/lofreq1.vcf"),
log:
"logs/{sample}/lofreq1.log",
vcf="results/{sample}/vars.vcf",
out="results/{sample}/vars.txt",
params:
extra="--call-indels",
resources:
runtime=120,
mem_mb=lambda wildcards, input: 4000 + 40 * (input.bam.size // 1000000),
threads: 4
wrapper:
f"{WRAPPER_PREFIX}/v0.6/lofreq/call"


rule indexfeaturefile:
"""
Index vcf vile.
"""
input:
rules.lofreq1.output[0],
output:
temp("results/{sample}/lofreq1.vcf.idx"),
extra="minallelefraction=0.05 strandedcov",
log:
"logs/{sample}/indexfeaturefile.log",
params:
extra="",
"logs/{sample}/callvariants.log",
resources:
runtime=120,
mem_mb=4000,
threads: 1
wrapper:
f"{WRAPPER_PREFIX}/v0.6.1/gatk/indexfeaturefile"


rule gatk_baserecalibrator:
input:
ref="resources/refseq/NC_045512.2/sequences.fa",
bam=rules.indelqual1.output[0],
dict="resources/refseq/NC_045512.2/sequences.dict",
known=rules.lofreq1.output[0],
feature_index=rules.indexfeaturefile.output[0],
output:
recal_table=temp("results/{sample}/recal_table.grp"),
log:
"logs/{sample}/baserecalibrator.log",
resources:
runtime=120,
mem_mb=lambda wildcards, input: 4000 + 40 * (input.bam.size // 1000000),
wrapper:
"0.68.0/bio/gatk/baserecalibrator"


rule applybqsr:
"""
Inserts indel qualities into BAM.
"""
input:
ref="resources/refseq/NC_045512.2/sequences.fa",
bam=rules.indelqual1.output[0],
recal_table="results/{sample}/recal_table.grp",
output:
bam=temp("results/{sample}/recalibrated.bam"),
log:
"logs/{sample}/applybqsr.log",
resources:
runtime=120,
mem_mb=lambda wildcards, input: 4000 + 40 * (input.bam.size // 1000000),
wrapper:
"0.68.0/bio/gatk/applybqsr"


rule indelqual:
"""
Insert indel qualities.
"""
input:
ref="resources/refseq/NC_045512.2/sequences.fa",
bam=rules.applybqsr.output.bam,
output:
temp("results/{sample}/indelqual.bam"),
log:
"logs/{sample}/indelqual.log",
params:
extra="--verbose",
resources:
runtime=120,
mem_mb=lambda wildcards, input: 4000 + 40 * (input.bam.size // 1000000),
threads: 4
wrapper:
f"{WRAPPER_PREFIX}/v0.6/lofreq/indelqual"


rule lofreq:
"""
Variant calling.
"""
input:
ref="resources/refseq/NC_045512.2/sequences.fa",
bam=rules.indelqual.output[0],
output:
"results/{sample}/lofreq.vcf",
log:
"logs/{sample}/lofreq.log",
params:
extra="--call-indels",
resources:
runtime=120,
mem_mb=lambda wildcards, input: 4000 + 40 * (input.bam.size // 1000000),
threads: 4
wrapper:
f"{WRAPPER_PREFIX}/v0.6/lofreq/call"
f"{WRAPPER_PREFIX}/v0.8.0/bbtools/callvariants"


rule pileup:
Expand Down Expand Up @@ -393,7 +220,7 @@ rule vcffilter:
Filter variants based on allele frequency.
"""
input:
rules.lofreq.output[0],
rules.callvariants.output.vcf,
output:
"results/{sample}/filtered.vcf",
log:
Expand Down Expand Up @@ -438,8 +265,7 @@ rule snpeff:
Functional annotation of variants.
"""
input:
calls="results/{sample}/lofreq.vcf",
db="resources/refseq/NC_045512.2",
calls="results/{sample}/vars.vcf",
output:
calls="results/{sample}/snpeff.vcf", # annotated calls (vcf, bcf, or vcf.gz)
stats="results/{sample}/snpeff.html", # summary statistics (in HTML), optional
Expand All @@ -448,12 +274,13 @@ rule snpeff:
log:
"logs/{sample}/snpeff.log",
params:
db="resources/refseq/NC_045512.2",
extra="-configOption 'NC_045512.2'.genome='NC_045512.2' -configOption 'NC_045512.2'.codonTable='Standard' -formatEff -classic -no-downstream -no-intergenic -no-intron -no-upstream -no-utr",
resources:
runtime=120,
mem_mb=4000,
wrapper:
f"{WRAPPER_PREFIX}/v0.7.2/snpeff"
f"{WRAPPER_PREFIX}/master/snpeff"


rule snpsift:
Expand Down
21 changes: 21 additions & 0 deletions workflow/envs/pangolin/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: pangolin
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- biopython=1.74
- iqtree<2
- mafft
- minimap2
- pip=19.3.1
- python=3.6
- snakemake-minimal=5.13
- pip:
- pandas==1.0.1
- pytools==2020.1
- dendropy>=4.4.0
- git+https://github.com/cov-ert/datafunk.git
- git+https://github.com/cov-lineages/pangoLEARN.git
- git+https://github.com/cov-lineages/lineages.git@2020-05-19-2
- git+https://github.com/cov-lineages/pangolin.git
2 changes: 1 addition & 1 deletion workflow/schemas/samples.schema.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
$schema: "http://json-schema.org/draft-07/schema#"
$schema: "http://json-schema.org/draft-04/schema#"
description: an entry in the sample sheet
properties:
sample_name:
Expand Down

0 comments on commit b976795

Please sign in to comment.