Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add filter for variants not supported by both strands #7

Merged
merged 6 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/juno_mapping_conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- name: Install Conda environment with Micromamba
uses: mamba-org/setup-micromamba@v1
with:
generate-run-shell: false # see https://github.com/mamba-org/setup-micromamba/issues/130
cache-downloads: true
environment-file: envs/juno_mapping.yaml
- name: Cache minikraken
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/juno_mapping_singularity.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- name: Install Conda environment with Micromamba
uses: mamba-org/setup-micromamba@v1
with:
generate-run-shell: false # see https://github.com/mamba-org/setup-micromamba/issues/130
cache-downloads: true
environment-file: envs/juno_mapping.yaml
- name: Cache minikraken
Expand Down
4 changes: 2 additions & 2 deletions envs/juno_mapping.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ dependencies:
- git=2.40.*
- mamba=1.3.*
- pandas=1.5.*
- snakemake=7.18.*
- snakemake=7.32.0
- pip=23.*
- python=3.11.*
- pyvcf
- pip:
- "--editable=git+https://github.com/RIVM-bioinformatics/juno-library.git@v2.0.0#egg=juno_library"
- "--editable=git+https://github.com/RIVM-bioinformatics/juno-library.git@v2.1.2#egg=juno_library"
21 changes: 16 additions & 5 deletions juno_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
Date: 25-05-2023
"""

from pathlib import Path
import pathlib
import yaml
import argparse
import pathlib
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Optional, Union

import yaml
from juno_library import Pipeline
from typing import Optional, Callable, Union, Any
from version import __package_name__, __version__, __description__

from version import __description__, __package_name__, __version__


def main() -> None:
Expand Down Expand Up @@ -150,6 +152,13 @@ def _add_args_to_parser(self) -> None:
default=0.8,
help="Minimum allele frequency to filter variants on.",
)
self.add_argument(
"--min-reads-per-strand",
type=check_number_within_range(minimum=0, maximum=9999),
metavar="INT",
default=1,
help="Minimum number of reads per strand to call a variant.",
)

def _parse_args(self) -> argparse.Namespace:
args = super()._parse_args()
Expand All @@ -168,6 +177,7 @@ def _parse_args(self) -> argparse.Namespace:
self.species: str = args.species
self.minimum_depth_variant: int = args.minimum_depth_variant
self.minimum_allele_frequency: float = args.minimum_allele_frequency
self.min_reads_per_strand: int = args.min_reads_per_strand

return args

Expand Down Expand Up @@ -249,6 +259,7 @@ def setup(self) -> None:
"species": str(self.species),
"minimum_depth": int(self.minimum_depth_variant),
"minimum_allele_frequency": float(self.minimum_allele_frequency),
"min_reads_per_strand": int(self.min_reads_per_strand),
}


Expand Down
55 changes: 29 additions & 26 deletions workflow/rules/variant_filtering.smk
Original file line number Diff line number Diff line change
@@ -1,59 +1,62 @@
rule filter_af:
rule FilterMutectCalls:
input:
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
vcf=OUT + "/variants_raw/raw/{sample}.vcf",
stats=OUT + "/variants_raw/raw/{sample}.vcf.stats",
ref=OUT + "/reference/reference.fasta",
output:
vcf=OUT + "/variants_raw/FMC_af/{sample}.vcf",
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
message:
"Marking minority variants for {wildcards.sample}"
"Marking low confidence variants for {wildcards.sample}, based on FilterMutectCalls microbial mode"
container:
"docker://staphb/bcftools:1.16"
"docker://broadinstitute/gatk:4.3.0.0"
conda:
"../envs/bcftools.yaml"
"../envs/gatk_picard.yaml"
params:
min_af=config["minimum_allele_frequency"],
min_reads_per_strand=config["min_reads_per_strand"],
log:
OUT + "/log/filter_af/{sample}.log",
OUT + "/log/FilterMutectCalls/{sample}.log",
threads: config["threads"]["filter_variants"]
resources:
mem_gb=config["mem_gb"]["filter_variants"],
shell:
"""
bcftools filter \
--exclude \"FORMAT/AF < {params.min_af}\" \
--soft-filter "min_af_{params.min_af}" \
{input.vcf} \
1>{output.vcf} \
2>{log}
gatk FilterMutectCalls -V {input.vcf} \
-R {input.ref} \
-O {output.vcf} \
--stats {input.stats} \
--min-reads-per-strand {params.min_reads_per_strand} \
--microbial-mode 2>&1>{log}
"""


rule FilterMutectCalls:
rule filter_af:
input:
vcf=OUT + "/variants_raw/raw/{sample}.vcf",
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
stats=OUT + "/variants_raw/raw/{sample}.vcf.stats",
ref=OUT + "/reference/reference.fasta",
output:
vcf=OUT + "/variants_raw/FMC/{sample}.vcf",
vcf=OUT + "/variants_raw/FMC_af/{sample}.vcf",
message:
"Marking low confidence variants for {wildcards.sample}, based on FilterMutectCalls microbial mode"
"Marking minority variants for {wildcards.sample}"
container:
"docker://broadinstitute/gatk:4.3.0.0"
"docker://staphb/bcftools:1.16"
conda:
"../envs/gatk_picard.yaml"
"../envs/bcftools.yaml"
params:
min_af=config["minimum_allele_frequency"],
log:
OUT + "/log/FilterMutectCalls/{sample}.log",
OUT + "/log/filter_af/{sample}.log",
threads: config["threads"]["filter_variants"]
resources:
mem_gb=config["mem_gb"]["filter_variants"],
shell:
"""
gatk FilterMutectCalls -V {input.vcf} \
-R {input.ref} \
-O {output.vcf} \
--stats {input.stats} \
--microbial-mode 2>&1>{log}
bcftools filter \
--exclude \"FORMAT/AF < {params.min_af}\" \
--soft-filter "min_af_{params.min_af}" \
{input.vcf} \
1>{output.vcf} \
2>{log}
"""


Expand Down
Loading