Skip to content

Commit

Permalink
fix decomp BND
Browse files Browse the repository at this point in the history
  • Loading branch information
ACEnglish committed Jan 9, 2025
1 parent 3da688f commit d24c58b
Show file tree
Hide file tree
Showing 26 changed files with 122 additions and 80 deletions.
7 changes: 5 additions & 2 deletions repo_utils/answer_key/bench/bench_bnd/candidate.refine.bed
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ chr16 1025231 1025252
chr16 49626994 49627100
chr16 89536459 89536480
chr17 31964205 32004138
chr17 32670386 32670407
chr17 77850848 77850938
chr17 83120464 83120485
chr17 83127668 83127689
Expand All @@ -46,12 +47,14 @@ chr18 70465189 70465210
chr18 78390326 78390347
chr19 395626 395835
chr19 3477781 3477868
chr19 11196125 11218966
chr19 29439299 29439424
chr19 38981201 38983536
chr19 44991097 44991118
chr19 53185919 53185940
chr19 55068010 55068465
chr2 9737589 9737610
chr2 80358794 80358815
chr2 80401493 80401514
chr2 138335007 138335083
chr2 157917136 157917157
chr2 238119351 238119427
Expand Down Expand Up @@ -86,7 +89,7 @@ chr6 157972782 157972814
chr7 4824638 4824659
chr7 7802502 7802523
chr7 23502979 23503000
chr7 68922678 68950323
chr7 68922678 68922699
chr7 75988853 75988924
chr7 144186929 144186950
chr7 158151440 158151461
Expand Down
Binary file modified repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz.tbi
Binary file not shown.
49 changes: 26 additions & 23 deletions repo_utils/answer_key/bench/bench_bnd/log.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
2025-01-08 19:03:15,280 [INFO] Truvari v5.0.0
2025-01-08 19:03:15,280 [INFO] Command /Users/english/code/truvari/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/ --no-decompose
2025-01-08 19:03:15,280 [INFO] Params:
2025-01-08 20:16:32,128 [INFO] Truvari v5.0.0
2025-01-08 20:16:32,129 [INFO] Command /Users/english/code/truvari/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/ --no-decompose
2025-01-08 20:16:32,129 [INFO] Params:
{
"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz",
"comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp.vcf.gz",
Expand Down Expand Up @@ -35,33 +35,36 @@
"short_circuit": false,
"skip_gt": false
}
2025-01-08 19:03:15,294 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF.
2025-01-08 19:03:15,364 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175})
2025-01-08 19:03:15,365 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113})
2025-01-08 19:03:15,412 [INFO] Stats: {
"TP-base": 85,
"TP-comp": 85,
"FP": 81,
"FN": 28,
"precision": 0.5120481927710844,
"recall": 0.7522123893805309,
"f1": 0.6093189964157706,
"base cnt": 113,
"comp cnt": 166,
"TP-comp_TP-gt": 68,
"TP-comp_FP-gt": 17,
"TP-base_TP-gt": 68,
"TP-base_FP-gt": 17,
2025-01-08 20:16:32,143 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF.
2025-01-08 20:16:32,220 [INFO] Zipped 439 variants Counter({'comp': 243, 'base': 196})
2025-01-08 20:16:32,220 [INFO] 202 chunks of 439 variants Counter({'comp': 190, 'base': 135, '__filtered': 114})
2025-01-08 20:16:32,268 [INFO] Stats: {
"TP-base": 80,
"TP-comp": 80,
"FP": 88,
"FN": 55,
"precision": 0.47619047619047616,
"recall": 0.5925925925925926,
"f1": 0.528052805280528,
"base cnt": 135,
"comp cnt": 168,
"TP-comp_TP-gt": 64,
"TP-comp_FP-gt": 16,
"TP-base_TP-gt": 64,
"TP-base_FP-gt": 16,
"gt_concordance": 0.8,
"gt_matrix": {
"(0, 1)": {
"(0, 1)": 65,
"(0, 0)": 16,
"(0, 1)": 59,
"(0, 0)": 15,
"(1, 1)": 1
},
"(1, 0)": {
"(0, 1)": 3
},
"(0, 0, 1)": {
"(0, 1)": 2
}
}
}
2025-01-08 19:03:15,412 [INFO] Finished bench
2025-01-08 20:16:32,268 [INFO] Finished bench
33 changes: 18 additions & 15 deletions repo_utils/answer_key/bench/bench_bnd/summary.json
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
{
"TP-base": 85,
"TP-comp": 85,
"FP": 81,
"FN": 28,
"precision": 0.5120481927710844,
"recall": 0.7522123893805309,
"f1": 0.6093189964157706,
"base cnt": 113,
"comp cnt": 166,
"TP-comp_TP-gt": 68,
"TP-comp_FP-gt": 17,
"TP-base_TP-gt": 68,
"TP-base_FP-gt": 17,
"TP-base": 80,
"TP-comp": 80,
"FP": 88,
"FN": 55,
"precision": 0.47619047619047616,
"recall": 0.5925925925925926,
"f1": 0.528052805280528,
"base cnt": 135,
"comp cnt": 168,
"TP-comp_TP-gt": 64,
"TP-comp_FP-gt": 16,
"TP-base_TP-gt": 64,
"TP-base_FP-gt": 16,
"gt_concordance": 0.8,
"gt_matrix": {
"(0, 1)": {
"(0, 1)": 65,
"(0, 0)": 16,
"(0, 1)": 59,
"(0, 0)": 15,
"(1, 1)": 1
},
"(1, 0)": {
"(0, 1)": 3
},
"(0, 0, 1)": {
"(0, 1)": 2
}
}
}
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -8343,7 +8343,7 @@ chr17 31964205 32004139
chr17 32092175 32092252
chr17 32109708 32110047
chr17 32276768 32276789
chr17 32305433 32677822
chr17 32305434 32677819
chr17 32948020 32948041
chr17 33357020 33357041
chr17 33464495 33464582
Expand Down Expand Up @@ -9510,7 +9510,7 @@ chr19 9970026 9970047
chr19 10377655 10377676
chr19 10714997 10715018
chr19 10758333 10758354
chr19 11196126 11219734
chr19 11196126 11219071
chr19 11613135 11613156
chr19 11811887 11811991
chr19 12365158 12365179
Expand Down Expand Up @@ -16472,7 +16472,7 @@ chr7 68462829 68462850
chr7 68535131 68535152
chr7 68664068 68664222
chr7 68761501 68761522
chr7 68922678 68950323
chr7 68949177 68949198
chr7 68959651 68959672
chr7 68997073 68997413
chr7 69027642 69027663
Expand Down
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fn.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/fp.vcf.gz.tbi
Binary file not shown.
55 changes: 35 additions & 20 deletions repo_utils/answer_key/bench/bench_bnd_decomp/log.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
2025-01-08 18:56:46,711 [INFO] Truvari v5.0.0
2025-01-08 18:56:46,711 [INFO] Command /Users/english/code/truvari/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp2.vcf.gz --sizemax 1000000000 -p 0 --pick multi -o test_results/bench_bnd_decomp/
2025-01-08 18:56:46,711 [INFO] Params:
2025-01-08 20:17:40,376 [INFO] Truvari v5.0.0
2025-01-08 20:17:40,377 [INFO] Command /Users/english/code/truvari/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp2.vcf.gz --sizemax 1000000000 -p 0 --pick multi -o test_results/bench_bnd_decomp/
2025-01-08 20:17:40,377 [INFO] Params:
{
"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz",
"comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp2.vcf.gz",
Expand Down Expand Up @@ -35,32 +35,47 @@
"short_circuit": false,
"skip_gt": false
}
2025-01-08 18:56:46,741 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF.
2025-01-08 18:56:52,141 [INFO] Zipped 30077 variants Counter({'comp': 29902, 'base': 175})
2025-01-08 18:56:52,141 [INFO] 20652 chunks of 30077 variants Counter({'comp': 29902, 'base': 170, '__filtered': 5})
2025-01-08 18:56:55,373 [INFO] Stats: {
"TP-base": 137,
"TP-comp": 129,
"FP": 28401,
"FN": 33,
"precision": 0.00452155625657203,
"recall": 0.8058823529411765,
"f1": 0.0089926574974431,
"base cnt": 170,
2025-01-08 20:17:40,408 [WARNING] 193 contigs present in comparison VCF header are not in baseline VCF.
2025-01-08 20:17:45,964 [INFO] Zipped 30098 variants Counter({'comp': 29902, 'base': 196})
2025-01-08 20:17:45,965 [INFO] 20659 chunks of 30098 variants Counter({'comp': 29902, 'base': 186, '__filtered': 10})
2025-01-08 20:17:49,303 [INFO] Stats: {
"TP-base": 144,
"TP-comp": 126,
"FP": 28404,
"FN": 42,
"precision": 0.004416403785488959,
"recall": 0.7741935483870968,
"f1": 0.008782706432809682,
"base cnt": 186,
"comp cnt": 28530,
"TP-comp_TP-gt": 1,
"TP-comp_FP-gt": 128,
"TP-comp_FP-gt": 125,
"TP-base_TP-gt": 1,
"TP-base_FP-gt": 136,
"gt_concordance": 0.007751937984496124,
"TP-base_FP-gt": 143,
"gt_concordance": 0.007936507936507936,
"gt_matrix": {
"(0, 1)": {
"(0, 0)": 134,
"(0, 0)": 132,
"(0, 1)": 1
},
"(0, 0, 1)": {
"(0, 0)": 3
},
"(0, 1, 0)": {
"(0, 0)": 2
},
"(0, 0, 0, 1)": {
"(0, 0)": 1
},
"(0, 0, 1, 0)": {
"(0, 0)": 1
},
"(0, 1, 1)": {
"(0, 0)": 2
},
"(1, 0)": {
"(0, 0)": 2
}
}
}
2025-01-08 18:56:55,373 [INFO] Finished bench
2025-01-08 20:17:49,303 [INFO] Finished bench
39 changes: 27 additions & 12 deletions repo_utils/answer_key/bench/bench_bnd_decomp/summary.json
Original file line number Diff line number Diff line change
@@ -1,23 +1,38 @@
{
"TP-base": 137,
"TP-comp": 129,
"FP": 28401,
"FN": 33,
"precision": 0.00452155625657203,
"recall": 0.8058823529411765,
"f1": 0.0089926574974431,
"base cnt": 170,
"TP-base": 144,
"TP-comp": 126,
"FP": 28404,
"FN": 42,
"precision": 0.004416403785488959,
"recall": 0.7741935483870968,
"f1": 0.008782706432809682,
"base cnt": 186,
"comp cnt": 28530,
"TP-comp_TP-gt": 1,
"TP-comp_FP-gt": 128,
"TP-comp_FP-gt": 125,
"TP-base_TP-gt": 1,
"TP-base_FP-gt": 136,
"gt_concordance": 0.007751937984496124,
"TP-base_FP-gt": 143,
"gt_concordance": 0.007936507936507936,
"gt_matrix": {
"(0, 1)": {
"(0, 0)": 134,
"(0, 0)": 132,
"(0, 1)": 1
},
"(0, 0, 1)": {
"(0, 0)": 3
},
"(0, 1, 0)": {
"(0, 0)": 2
},
"(0, 0, 0, 1)": {
"(0, 0)": 1
},
"(0, 0, 1, 0)": {
"(0, 0)": 1
},
"(0, 1, 1)": {
"(0, 0)": 2
},
"(1, 0)": {
"(0, 0)": 2
}
Expand Down
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-comp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd_decomp/tp-comp.vcf.gz.tbi
Binary file not shown.
2 changes: 1 addition & 1 deletion repo_utils/sub_tests/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ run bench_gtcomp_edgecase1 $truv bench -b $INDIR/variants/gtcomp_problem1_base.v
if [ $bench_gtcomp_edgecase1 ]; then
bench_assert _gtcomp_edgecase1
fi
run bench_badparams $truv bench -b nofile.vcf -c nofile.aga -f notref.fa -o $OD --refdist 0 --extend
run bench_badparams $truv bench -b nofile.vcf -c nofile.aga -f notref.fa -o $OD --refdist 0 --extend 1
if [ $bench_badparams ]; then
assert_exit_code 100
fi
Expand Down
Binary file modified repo_utils/test_files/variants/bnd.base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/test_files/variants/bnd.base.vcf.gz.tbi
Binary file not shown.
11 changes: 7 additions & 4 deletions truvari/variant_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def bnd_position(self):
"""
Extracts the chromosome and position from a BND ALT string.
Breakend (BND) ALT strings indicate structural variant breakpoints that span across chromosomes or positions.
Breakend (BND) ALT strings indicate structural variant breakpoints that span across chromosomes or positions.
This method parses the ALT string to extract the target chromosome and position of the breakpoint.
:return: A tuple containing the chromosome (as a string) and the position (as an integer).
Expand Down Expand Up @@ -298,11 +298,13 @@ def decompose(self):
raise ValueError("Can only decompose symbolic variants")
if self.decomp_repr:
return self.decomp_repr

svtype = self.var_type()
chrom = self.chrom
pos = self.pos
end = self.end
ret = []

if svtype == truvari.SV.INV:
record1 = self.copy()
record1.alts = (f"[{chrom}:{end}[N",)
Expand Down Expand Up @@ -338,15 +340,16 @@ def decompose(self):

elif svtype == truvari.SV.DUP:
record1 = self.copy()
record1.alts = (f"N]chr{chrom}:{end}]",)
record1.info["SVTYPE"] = "BND"
record1.alts = (f"]{chrom}:{end}]N",)
record1.info["SVTYPE"] == "BND"

record2 = self.copy()
record2.pos = end
record2.alts = (f"[chr{chrom}:{pos}[N",)
record2.alts = (f"N[{chrom}:{pos}[",)
record2.info["SVTYPE"] = "BND"

ret = [record1, record2]

self.decomp_repr = ret
return ret

Expand Down

0 comments on commit d24c58b

Please sign in to comment.