Skip to content

Commit

Permalink
Merge pull request #7 from RIVM-bioinformatics/add_filter
Browse files Browse the repository at this point in the history
Add filter for variants not supported by both strands
  • Loading branch information
boasvdp authored Jan 25, 2024
2 parents 4fb0c74 + 4a59f6a commit c43bfa7
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 33 deletions.
1 change: 1 addition & 0 deletions .github/workflows/juno_mapping_conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- name: Install Conda environment with Micromamba
uses: mamba-org/setup-micromamba@v1
with:
generate-run-shell: false # see https://github.com/mamba-org/setup-micromamba/issues/130
cache-downloads: true
environment-file: envs/juno_mapping.yaml
- name: Cache minikraken
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/juno_mapping_singularity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- name: Install Conda environment with Micromamba
uses: mamba-org/setup-micromamba@v1
with:
generate-run-shell: false # see https://github.com/mamba-org/setup-micromamba/issues/130
cache-downloads: true
environment-file: envs/juno_mapping.yaml
- name: Cache minikraken
Expand Down
4 changes: 2 additions & 2 deletions envs/juno_mapping.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ dependencies:
- git=2.40.*
- mamba=1.3.*
- pandas=1.5.*
- snakemake=7.18.*
- snakemake=7.32.0
- pip=23.*
- python=3.11.*
- pyvcf
- pip:
- "--editable=git+https://github.com/RIVM-bioinformatics/juno-library.git@v2.0.0#egg=juno_library"
- "--editable=git+https://github.com/RIVM-bioinformatics/juno-library.git@v2.1.2#egg=juno_library"
21 changes: 16 additions & 5 deletions juno_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
Date: 25-05-2023
"""

from pathlib import Path
import pathlib
import yaml
import argparse
import pathlib
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Optional, Union

import yaml
from juno_library import Pipeline
from typing import Optional, Callable, Union, Any
from version import __package_name__, __version__, __description__

from version import __description__, __package_name__, __version__


def main() -> None:
Expand Down Expand Up @@ -150,6 +152,13 @@ def _add_args_to_parser(self) -> None:
default=0.8,
help="Minimum allele frequency to filter variants on.",
)
self.add_argument(
"--min-reads-per-strand",
type=check_number_within_range(minimum=0, maximum=9999),
metavar="INT",
default=1,
help="Minimum number of reads per strand to call a variant.",
)

def _parse_args(self) -> argparse.Namespace:
args = super()._parse_args()
Expand All @@ -168,6 +177,7 @@ def _parse_args(self) -> argparse.Namespace:
self.species: str = args.species
self.minimum_depth_variant: int = args.minimum_depth_variant
self.minimum_allele_frequency: float = args.minimum_allele_frequency
self.min_reads_per_strand: int = args.min_reads_per_strand

return args

Expand Down Expand Up @@ -249,6 +259,7 @@ def setup(self) -> None:
"species": str(self.species),
"minimum_depth": int(self.minimum_depth_variant),
"minimum_allele_frequency": float(self.minimum_allele_frequency),
"min_reads_per_strand": int(self.min_reads_per_strand),
}


Expand Down
55 changes: 29 additions & 26 deletions workflow/rules/variant_filtering.smk
Original file line number Diff line number Diff line change
@@ -1,59 +1,62 @@
rule filter_af:
rule FilterMutectCalls:
input:
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
vcf=OUT + "/variants_raw/raw/{sample}.vcf",
stats=OUT + "/variants_raw/raw/{sample}.vcf.stats",
ref=OUT + "/reference/reference.fasta",
output:
vcf=OUT + "/variants_raw/FMC_af/{sample}.vcf",
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
message:
"Marking minority variants for {wildcards.sample}"
"Marking low confidence variants for {wildcards.sample}, based on FilterMutectCalls microbial mode"
container:
"docker://staphb/bcftools:1.16"
"docker://broadinstitute/gatk:4.3.0.0"
conda:
"../envs/bcftools.yaml"
"../envs/gatk_picard.yaml"
params:
min_af=config["minimum_allele_frequency"],
min_reads_per_strand=config["min_reads_per_strand"],
log:
OUT + "/log/filter_af/{sample}.log",
OUT + "/log/FilterMutectCalls/{sample}.log",
threads: config["threads"]["filter_variants"]
resources:
mem_gb=config["mem_gb"]["filter_variants"],
shell:
"""
bcftools filter \
--exclude \"FORMAT/AF < {params.min_af}\" \
--soft-filter "min_af_{params.min_af}" \
{input.vcf} \
1>{output.vcf} \
2>{log}
gatk FilterMutectCalls -V {input.vcf} \
-R {input.ref} \
-O {output.vcf} \
--stats {input.stats} \
--min-reads-per-strand {params.min_reads_per_strand} \
--microbial-mode 2>&1>{log}
"""


rule FilterMutectCalls:
rule filter_af:
input:
vcf=OUT + "/variants_raw/raw/{sample}.vcf",
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
stats=OUT + "/variants_raw/raw/{sample}.vcf.stats",
ref=OUT + "/reference/reference.fasta",
output:
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
vcf=OUT + "/variants_raw/FMC_af/{sample}.vcf",
message:
"Marking low confidence variants for {wildcards.sample}, based on FilterMutectCalls microbial mode"
"Marking minority variants for {wildcards.sample}"
container:
"docker://broadinstitute/gatk:4.3.0.0"
"docker://staphb/bcftools:1.16"
conda:
"../envs/gatk_picard.yaml"
"../envs/bcftools.yaml"
params:
min_af=config["minimum_allele_frequency"],
log:
OUT + "/log/FilterMutectCalls/{sample}.log",
OUT + "/log/filter_af/{sample}.log",
threads: config["threads"]["filter_variants"]
resources:
mem_gb=config["mem_gb"]["filter_variants"],
shell:
"""
gatk FilterMutectCalls -V {input.vcf} \
-R {input.ref} \
-O {output.vcf} \
--stats {input.stats} \
--microbial-mode 2>&1>{log}
bcftools filter \
--exclude \"FORMAT/AF < {params.min_af}\" \
--soft-filter "min_af_{params.min_af}" \
{input.vcf} \
1>{output.vcf} \
2>{log}
"""


Expand Down

0 comments on commit c43bfa7

Please sign in to comment.