From 4bc016df495efbfb6038a363c42563a79e3c83cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Thu, 9 Jan 2025 14:47:44 +0100 Subject: [PATCH 1/2] feat: first steps to add vaf calculation for strelka --- .gitignore | 3 ++- workflow/envs/cyvcf.yaml | 5 +++++ workflow/rules/common.smk | 2 ++ workflow/rules/eval.smk | 13 +++++++++++++ workflow/scripts/calc-vaf.py | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 workflow/envs/cyvcf.yaml create mode 100644 workflow/scripts/calc-vaf.py diff --git a/.gitignore b/.gitignore index 3d2ebb6..1cdcbf7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ resources/** logs logs/** data/** -report/** \ No newline at end of file +report/** +test/** \ No newline at end of file diff --git a/workflow/envs/cyvcf.yaml b/workflow/envs/cyvcf.yaml new file mode 100644 index 0000000..bf54b6e --- /dev/null +++ b/workflow/envs/cyvcf.yaml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - cyvcf2 \ No newline at end of file diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 12188a9..e4ddb27 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -455,6 +455,8 @@ def get_vaf_status(wildcards): vaf_benchmark = benchmarks[wildcards.benchmark].get("vaf-field") if vaf_benchmark is None: return False + if vaf_benchmark == "tbc": + return True else: callsets = get_benchmark_callsets(wildcards.benchmark) vaf_callsets = [ diff --git a/workflow/rules/eval.smk b/workflow/rules/eval.smk index 724f53a..624ff73 100644 --- a/workflow/rules/eval.smk +++ b/workflow/rules/eval.smk @@ -91,6 +91,19 @@ rule remove_non_pass: "v3.3.6/bio/bcftools/view" +rule calculate_vaf: + input: + "results/filtered-variants/{callset}.bcf" + output: + "results/calculate-vaf/{callset}.added-vaf.bcf" + log: + "logs/calculate-vaf/" + conda: + "../envs/cyvcf.yaml" + script: + "../scripts/calc-vaf.py" + + rule intersect_calls_with_target_regions: input: bcf="results/filtered-variants/{callset}.bcf", diff --git a/workflow/scripts/calc-vaf.py b/workflow/scripts/calc-vaf.py new file mode 100644 index 0000000..4df4474 --- /dev/null +++ b/workflow/scripts/calc-vaf.py @@ -0,0 +1,36 @@ +from cyvcf2 import VCF + +#vcf = "/Users/famke/01-pm4onco/osf-download/pipeline-results-of-imgag-data/qbic/strelka/tumor_5perc_vs_normal_5perc.strelka.somatic_snvs_VEP.ann.vcf" #snakemake.input +#indel = "/Users/famke/01-pm4onco/osf-download/pipeline-results-of-imgag-data/qbic/strelka/tumor_5perc_vs_normal_5perc.strelka.somatic_indels_VEP.ann.vcf.gz" + +# +# SNVS +# + +def get_snv_allele_freq(vcf): + for variant in VCF(vcf): + refCounts = variant.format(variant.REF + "U") + altCounts = variant.format(variant.ALT[0] + "U") + + # TODO: check which value is the correct one from the matrix (this leads to many zero VAF) + tier1RefCounts = refCounts[0, 0] + tier1AltCounts = altCounts[0, 0] + + vaf = tier1AltCounts / (tier1AltCounts + tier1RefCounts) + + print(vaf) + + + +# +# INDELs +# + +def get_indel_allele_freq(vcf): + for variant in VCF(vcf): + tier1RefCounts = variant.format("TAR")[0,0] + tier1AltCounts = variant.format("TIR")[0,0] + + vaf = tier1AltCounts / (tier1AltCounts + tier1RefCounts) + + print(vaf) From e78b133d94103ae2b6fa40c49eb20676f7146a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Thu, 9 Jan 2025 14:51:20 +0100 Subject: [PATCH 2/2] fix: snakefmt --- workflow/rules/eval.smk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflow/rules/eval.smk b/workflow/rules/eval.smk index 624ff73..a82c62d 100644 --- a/workflow/rules/eval.smk +++ b/workflow/rules/eval.smk @@ -93,11 +93,11 @@ rule remove_non_pass: rule calculate_vaf: input: - "results/filtered-variants/{callset}.bcf" + "results/filtered-variants/{callset}.bcf", output: - "results/calculate-vaf/{callset}.added-vaf.bcf" + "results/calculate-vaf/{callset}.added-vaf.bcf", log: - "logs/calculate-vaf/" + "logs/calculate-vaf/", conda: "../envs/cyvcf.yaml" script: