diff --git a/imgs/coverage.svg b/imgs/coverage.svg index a9915353..ea71b13a 100644 --- a/imgs/coverage.svg +++ b/imgs/coverage.svg @@ -17,7 +17,7 @@ coverage - 89% - 89% + 90% + 90% diff --git a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz index a3fc44a6..1dcb017f 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz differ diff --git a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi index ca63a5c6..819c3953 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/bench/bench_bnd/log.txt b/repo_utils/answer_key/bench/bench_bnd/log.txt index c18cd930..258b41b8 100644 --- a/repo_utils/answer_key/bench/bench_bnd/log.txt +++ b/repo_utils/answer_key/bench/bench_bnd/log.txt @@ -1,10 +1,10 @@ -2025-01-05 01:05:59,220 [INFO] Truvari v5.0.0 -2025-01-05 01:05:59,220 [INFO] Command /Users/english/py/bin/truvari bench -p 0 -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -o test_results/bnd_test -2025-01-05 01:05:59,220 [INFO] Params: +2025-01-05 07:08:37,421 [INFO] Truvari v5.0.0 +2025-01-05 07:08:37,422 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/ +2025-01-05 07:08:37,423 [INFO] Params: { - "base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz", - "comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp.vcf.gz", - "output": "test_results/bnd_test", + "base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", + "comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz", + "output": "test_results/bench_bnd/", "includebed": null, "extend": 0, "debug": false, @@ -29,33 +29,33 @@ "check_monref": true, "check_multi": true } -2025-01-05 01:05:59,235 [WARNING] Excluding 193 contigs present in comparison calls header but not baseline calls. -2025-01-05 01:05:59,283 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175}) -2025-01-05 01:05:59,283 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113}) -2025-01-05 01:05:59,338 [INFO] Stats: { - "TP-base": 89, - "TP-comp": 89, +2025-01-05 07:08:37,460 [WARNING] Excluding 193 contigs present in comparison calls header but not baseline calls. +2025-01-05 07:08:37,552 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175}) +2025-01-05 07:08:37,553 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113}) +2025-01-05 07:08:37,638 [INFO] Stats: { + "TP-base": 85, + "TP-comp": 85, "FP": 81, - "FN": 24, - "precision": 0.5235294117647059, - "recall": 0.7876106194690266, - "f1": 0.6289752650176679, + "FN": 28, + "precision": 0.5120481927710844, + "recall": 0.7522123893805309, + "f1": 0.6093189964157706, "base cnt": 113, - "comp cnt": 170, - "TP-comp_TP-gt": 54, - "TP-comp_FP-gt": 35, - "TP-base_TP-gt": 54, - "TP-base_FP-gt": 35, - "gt_concordance": 0.6067415730337079, + "comp cnt": 166, + "TP-comp_TP-gt": 68, + "TP-comp_FP-gt": 17, + "TP-base_TP-gt": 68, + "TP-base_FP-gt": 17, + "gt_concordance": 0.8, "gt_matrix": { - "None": { - "None": 18 - }, "(0, 1)": { - "(0, 1)": 54, + "(0, 1)": 65, "(0, 0)": 16, "(1, 1)": 1 + }, + "(1, 0)": { + "(0, 1)": 3 } } } -2025-01-05 01:05:59,338 [INFO] Finished bench +2025-01-05 07:08:37,638 [INFO] Finished bench diff --git a/repo_utils/answer_key/bench/bench_bnd/params.json b/repo_utils/answer_key/bench/bench_bnd/params.json index 2f51059e..cdafda38 100644 --- a/repo_utils/answer_key/bench/bench_bnd/params.json +++ b/repo_utils/answer_key/bench/bench_bnd/params.json @@ -1 +1 @@ -{"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp.vcf.gz", "output": "test_results/bnd_test", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_T_hiphase.haplotagged", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file +{"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz", "output": "test_results/bench_bnd/", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_T_hiphase.haplotagged", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true} \ No newline at end of file diff --git a/repo_utils/answer_key/bench/bench_bnd/summary.json b/repo_utils/answer_key/bench/bench_bnd/summary.json index b59c9830..f23027ac 100644 --- a/repo_utils/answer_key/bench/bench_bnd/summary.json +++ b/repo_utils/answer_key/bench/bench_bnd/summary.json @@ -1,26 +1,26 @@ { - "TP-base": 89, - "TP-comp": 89, + "TP-base": 85, + "TP-comp": 85, "FP": 81, - "FN": 24, - "precision": 0.5235294117647059, - "recall": 0.7876106194690266, - "f1": 0.6289752650176679, + "FN": 28, + "precision": 0.5120481927710844, + "recall": 0.7522123893805309, + "f1": 0.6093189964157706, "base cnt": 113, - "comp cnt": 170, - "TP-comp_TP-gt": 54, - "TP-comp_FP-gt": 35, - "TP-base_TP-gt": 54, - "TP-base_FP-gt": 35, - "gt_concordance": 0.6067415730337079, + "comp cnt": 166, + "TP-comp_TP-gt": 68, + "TP-comp_FP-gt": 17, + "TP-base_TP-gt": 68, + "TP-base_FP-gt": 17, + "gt_concordance": 0.8, "gt_matrix": { - "None": { - "None": 18 - }, "(0, 1)": { - "(0, 1)": 54, + "(0, 1)": 65, "(0, 0)": 16, "(1, 1)": 1 + }, + "(1, 0)": { + "(0, 1)": 3 } } } \ No newline at end of file diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz index bd97c282..f9bba69e 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz differ diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi index 403bcffc..a6ae62ba 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi differ diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz index 31fe05cc..a7c2e365 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz differ diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi index 3452be0f..b77233a1 100644 Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi differ diff --git a/repo_utils/sub_tests/bench.sh b/repo_utils/sub_tests/bench.sh index 98ceef54..97319f83 100644 --- a/repo_utils/sub_tests/bench.sh +++ b/repo_utils/sub_tests/bench.sh @@ -91,5 +91,5 @@ run test_bench_bnd $truv bench -b $INDIR/variants/bnd.base.vcf.gz \ -c $INDIR/variants/bnd.comp.vcf.gz \ -p 0 -o $OD/bench_bnd/ if [ $test_bench_bnd ]; then - bnech_assert _bnd + bench_assert _bnd fi diff --git a/truvari/bench.py b/truvari/bench.py index 6a56b174..065a7cf7 100644 --- a/truvari/bench.py +++ b/truvari/bench.py @@ -519,16 +519,15 @@ def compare_chunk(self, chunk): chunk_dict["base"], chunk_dict["comp"], chunk_id) self.check_refine_candidate(result) # Check BNDs separately - if self.matcher.params.bnddist != -1: + if self.matcher.params.bnddist != -1 and (chunk_dict['base_BND'] or chunk_dict['comp_BND']): result.extend(self.compare_calls(chunk_dict['base_BND'], chunk_dict['comp_BND'], chunk_id, True)) return result - def compare_calls(self, base_variants, comp_variants, chunk_id=0, bnds=False): + def compare_calls(self, base_variants, comp_variants, chunk_id=0, is_bnds=False): """ Builds MatchResults, returns them as a numpy matrix if there's at least one base and one comp variant. Otherwise, returns a list of the variants placed in MatchResults - sends to build_matrix if not bnd, else bnd_build_matrix """ # All FPs if len(base_variants) == 0: @@ -565,20 +564,20 @@ def compare_calls(self, base_variants, comp_variants, chunk_id=0, bnds=False): cnt += 1 pos.extend(truvari.entry_boundaries(i)) chrom = i.chrom - logging.warning( - "Skipping region %s:%d-%d with %d variants", chrom, min(*pos), max(*pos), cnt) + logging.warning("Skipping region %s:%d-%d with %d variants", + chrom, min(*pos), max(*pos), cnt) return [] - match_matrix = self.build_matrix( - base_variants, comp_variants, chunk_id, bnds) + match_matrix = self.build_matrix(base_variants, comp_variants, chunk_id, is_bnds=is_bnds) if isinstance(match_matrix, list): return match_matrix return PICKERS[self.matcher.params.pick](match_matrix) - def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, bnds=False): + def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, is_bnds=False): """ Builds MatchResults, returns them as a numpy matrix """ + matcher = self.matcher.build_match if not is_bnds else self.matcher.bnd_build_match if not base_variants or not comp_variants: raise RuntimeError( "Expected at least one base and one comp variant") @@ -586,9 +585,8 @@ def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, for bid, b in enumerate(base_variants): base_matches = [] for cid, c in enumerate(comp_variants): - matcher = self.matcher.build_match if not bnds else self.matcher.bnd_build_match - mat = matcher( - b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"], skip_gt, self.short_circuit) + mat = matcher(b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"], + skip_gt, self.short_circuit) logging.debug("Made mat -> %s", mat) base_matches.append(mat) match_matrix.append(base_matches) diff --git a/truvari/matching.py b/truvari/matching.py index 500c0a7f..070b6be5 100644 --- a/truvari/matching.py +++ b/truvari/matching.py @@ -259,7 +259,7 @@ def build_match(self, base, comp, matid=None, skip_gt=False, short_circuit=False return ret - def bnd_build_match(self, base, comp, matid=None, **_kwargs): + def bnd_build_match(self, base, comp, matid=None, *_args, **_kwargs): """ Build a MatchResult for bnds """ @@ -271,7 +271,7 @@ def bnd_build_match(self, base, comp, matid=None, **_kwargs): ret.state = base.chrom == comp.chrom ret.st_dist = base.pos - comp.pos - ret.state &= abs(ret.st_dist) < self.matcher.params.bnddist + ret.state &= abs(ret.st_dist) < self.params.bnddist b_bnd = bnd_direction_strand(base.alts[0]) c_bnd = bnd_direction_strand(comp.alts[0]) ret.state &= b_bnd == c_bnd @@ -280,15 +280,14 @@ def bnd_build_match(self, base, comp, matid=None, **_kwargs): c_pos2 = bnd_position(comp.alts[0]) ret.ed_dist = b_pos2[1] - c_pos2[1] ret.state &= b_pos2[0] == c_pos2[0] - ret.state &= ret.ed_dist < self.matcher.params.bnddist + ret.state &= ret.ed_dist < self.params.bnddist self.compare_gts(ret, base, comp) # Score is percent of allowed distance needed to find this match ret.score = (1 - ((abs(ret.st_dist) + abs(ret.ed_dist)) / - 2) / self.matcher.params.bnddist) * 100 + 2) / self.params.bnddist) * 100 # I think I'm missing GT stuff here - logging.debug("BND match -> %s", ret) return ret ############################