diff --git a/imgs/coverage.svg b/imgs/coverage.svg
index a9915353..ea71b13a 100644
--- a/imgs/coverage.svg
+++ b/imgs/coverage.svg
@@ -17,7 +17,7 @@
coverage
- 89%
- 89%
+ 90%
+ 90%
diff --git a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz
index a3fc44a6..1dcb017f 100644
Binary files a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz differ
diff --git a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi
index ca63a5c6..819c3953 100644
Binary files a/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi differ
diff --git a/repo_utils/answer_key/bench/bench_bnd/log.txt b/repo_utils/answer_key/bench/bench_bnd/log.txt
index c18cd930..258b41b8 100644
--- a/repo_utils/answer_key/bench/bench_bnd/log.txt
+++ b/repo_utils/answer_key/bench/bench_bnd/log.txt
@@ -1,10 +1,10 @@
-2025-01-05 01:05:59,220 [INFO] Truvari v5.0.0
-2025-01-05 01:05:59,220 [INFO] Command /Users/english/py/bin/truvari bench -p 0 -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -o test_results/bnd_test
-2025-01-05 01:05:59,220 [INFO] Params:
+2025-01-05 07:08:37,421 [INFO] Truvari v5.0.0
+2025-01-05 07:08:37,422 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/
+2025-01-05 07:08:37,423 [INFO] Params:
{
- "base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz",
- "comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp.vcf.gz",
- "output": "test_results/bnd_test",
+ "base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz",
+ "comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz",
+ "output": "test_results/bench_bnd/",
"includebed": null,
"extend": 0,
"debug": false,
@@ -29,33 +29,33 @@
"check_monref": true,
"check_multi": true
}
-2025-01-05 01:05:59,235 [WARNING] Excluding 193 contigs present in comparison calls header but not baseline calls.
-2025-01-05 01:05:59,283 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175})
-2025-01-05 01:05:59,283 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113})
-2025-01-05 01:05:59,338 [INFO] Stats: {
- "TP-base": 89,
- "TP-comp": 89,
+2025-01-05 07:08:37,460 [WARNING] Excluding 193 contigs present in comparison calls header but not baseline calls.
+2025-01-05 07:08:37,552 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175})
+2025-01-05 07:08:37,553 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113})
+2025-01-05 07:08:37,638 [INFO] Stats: {
+ "TP-base": 85,
+ "TP-comp": 85,
"FP": 81,
- "FN": 24,
- "precision": 0.5235294117647059,
- "recall": 0.7876106194690266,
- "f1": 0.6289752650176679,
+ "FN": 28,
+ "precision": 0.5120481927710844,
+ "recall": 0.7522123893805309,
+ "f1": 0.6093189964157706,
"base cnt": 113,
- "comp cnt": 170,
- "TP-comp_TP-gt": 54,
- "TP-comp_FP-gt": 35,
- "TP-base_TP-gt": 54,
- "TP-base_FP-gt": 35,
- "gt_concordance": 0.6067415730337079,
+ "comp cnt": 166,
+ "TP-comp_TP-gt": 68,
+ "TP-comp_FP-gt": 17,
+ "TP-base_TP-gt": 68,
+ "TP-base_FP-gt": 17,
+ "gt_concordance": 0.8,
"gt_matrix": {
- "None": {
- "None": 18
- },
"(0, 1)": {
- "(0, 1)": 54,
+ "(0, 1)": 65,
"(0, 0)": 16,
"(1, 1)": 1
+ },
+ "(1, 0)": {
+ "(0, 1)": 3
}
}
}
-2025-01-05 01:05:59,338 [INFO] Finished bench
+2025-01-05 07:08:37,638 [INFO] Finished bench
diff --git a/repo_utils/answer_key/bench/bench_bnd/params.json b/repo_utils/answer_key/bench/bench_bnd/params.json
index 2f51059e..cdafda38 100644
--- a/repo_utils/answer_key/bench/bench_bnd/params.json
+++ b/repo_utils/answer_key/bench/bench_bnd/params.json
@@ -1 +1 @@
-{"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp.vcf.gz", "output": "test_results/bnd_test", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_T_hiphase.haplotagged", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true}
\ No newline at end of file
+{"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz", "output": "test_results/bench_bnd/", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_T_hiphase.haplotagged", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true}
\ No newline at end of file
diff --git a/repo_utils/answer_key/bench/bench_bnd/summary.json b/repo_utils/answer_key/bench/bench_bnd/summary.json
index b59c9830..f23027ac 100644
--- a/repo_utils/answer_key/bench/bench_bnd/summary.json
+++ b/repo_utils/answer_key/bench/bench_bnd/summary.json
@@ -1,26 +1,26 @@
{
- "TP-base": 89,
- "TP-comp": 89,
+ "TP-base": 85,
+ "TP-comp": 85,
"FP": 81,
- "FN": 24,
- "precision": 0.5235294117647059,
- "recall": 0.7876106194690266,
- "f1": 0.6289752650176679,
+ "FN": 28,
+ "precision": 0.5120481927710844,
+ "recall": 0.7522123893805309,
+ "f1": 0.6093189964157706,
"base cnt": 113,
- "comp cnt": 170,
- "TP-comp_TP-gt": 54,
- "TP-comp_FP-gt": 35,
- "TP-base_TP-gt": 54,
- "TP-base_FP-gt": 35,
- "gt_concordance": 0.6067415730337079,
+ "comp cnt": 166,
+ "TP-comp_TP-gt": 68,
+ "TP-comp_FP-gt": 17,
+ "TP-base_TP-gt": 68,
+ "TP-base_FP-gt": 17,
+ "gt_concordance": 0.8,
"gt_matrix": {
- "None": {
- "None": 18
- },
"(0, 1)": {
- "(0, 1)": 54,
+ "(0, 1)": 65,
"(0, 0)": 16,
"(1, 1)": 1
+ },
+ "(1, 0)": {
+ "(0, 1)": 3
}
}
}
\ No newline at end of file
diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz
index bd97c282..f9bba69e 100644
Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz differ
diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi
index 403bcffc..a6ae62ba 100644
Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi differ
diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz
index 31fe05cc..a7c2e365 100644
Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz and b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz differ
diff --git a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi
index 3452be0f..b77233a1 100644
Binary files a/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi and b/repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi differ
diff --git a/repo_utils/sub_tests/bench.sh b/repo_utils/sub_tests/bench.sh
index 98ceef54..97319f83 100644
--- a/repo_utils/sub_tests/bench.sh
+++ b/repo_utils/sub_tests/bench.sh
@@ -91,5 +91,5 @@ run test_bench_bnd $truv bench -b $INDIR/variants/bnd.base.vcf.gz \
-c $INDIR/variants/bnd.comp.vcf.gz \
-p 0 -o $OD/bench_bnd/
if [ $test_bench_bnd ]; then
- bnech_assert _bnd
+ bench_assert _bnd
fi
diff --git a/truvari/bench.py b/truvari/bench.py
index 6a56b174..065a7cf7 100644
--- a/truvari/bench.py
+++ b/truvari/bench.py
@@ -519,16 +519,15 @@ def compare_chunk(self, chunk):
chunk_dict["base"], chunk_dict["comp"], chunk_id)
self.check_refine_candidate(result)
# Check BNDs separately
- if self.matcher.params.bnddist != -1:
+ if self.matcher.params.bnddist != -1 and (chunk_dict['base_BND'] or chunk_dict['comp_BND']):
result.extend(self.compare_calls(chunk_dict['base_BND'],
chunk_dict['comp_BND'], chunk_id, True))
return result
- def compare_calls(self, base_variants, comp_variants, chunk_id=0, bnds=False):
+ def compare_calls(self, base_variants, comp_variants, chunk_id=0, is_bnds=False):
"""
Builds MatchResults, returns them as a numpy matrix if there's at least one base and one comp variant.
Otherwise, returns a list of the variants placed in MatchResults
- sends to build_matrix if not bnd, else bnd_build_matrix
"""
# All FPs
if len(base_variants) == 0:
@@ -565,20 +564,20 @@ def compare_calls(self, base_variants, comp_variants, chunk_id=0, bnds=False):
cnt += 1
pos.extend(truvari.entry_boundaries(i))
chrom = i.chrom
- logging.warning(
- "Skipping region %s:%d-%d with %d variants", chrom, min(*pos), max(*pos), cnt)
+ logging.warning("Skipping region %s:%d-%d with %d variants",
+ chrom, min(*pos), max(*pos), cnt)
return []
- match_matrix = self.build_matrix(
- base_variants, comp_variants, chunk_id, bnds)
+ match_matrix = self.build_matrix(base_variants, comp_variants, chunk_id, is_bnds=is_bnds)
if isinstance(match_matrix, list):
return match_matrix
return PICKERS[self.matcher.params.pick](match_matrix)
- def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, bnds=False):
+ def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, is_bnds=False):
"""
Builds MatchResults, returns them as a numpy matrix
"""
+ matcher = self.matcher.build_match if not is_bnds else self.matcher.bnd_build_match
if not base_variants or not comp_variants:
raise RuntimeError(
"Expected at least one base and one comp variant")
@@ -586,9 +585,8 @@ def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False,
for bid, b in enumerate(base_variants):
base_matches = []
for cid, c in enumerate(comp_variants):
- matcher = self.matcher.build_match if not bnds else self.matcher.bnd_build_match
- mat = matcher(
- b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"], skip_gt, self.short_circuit)
+ mat = matcher(b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"],
+ skip_gt, self.short_circuit)
logging.debug("Made mat -> %s", mat)
base_matches.append(mat)
match_matrix.append(base_matches)
diff --git a/truvari/matching.py b/truvari/matching.py
index 500c0a7f..070b6be5 100644
--- a/truvari/matching.py
+++ b/truvari/matching.py
@@ -259,7 +259,7 @@ def build_match(self, base, comp, matid=None, skip_gt=False, short_circuit=False
return ret
- def bnd_build_match(self, base, comp, matid=None, **_kwargs):
+ def bnd_build_match(self, base, comp, matid=None, *_args, **_kwargs):
"""
Build a MatchResult for bnds
"""
@@ -271,7 +271,7 @@ def bnd_build_match(self, base, comp, matid=None, **_kwargs):
ret.state = base.chrom == comp.chrom
ret.st_dist = base.pos - comp.pos
- ret.state &= abs(ret.st_dist) < self.matcher.params.bnddist
+ ret.state &= abs(ret.st_dist) < self.params.bnddist
b_bnd = bnd_direction_strand(base.alts[0])
c_bnd = bnd_direction_strand(comp.alts[0])
ret.state &= b_bnd == c_bnd
@@ -280,15 +280,14 @@ def bnd_build_match(self, base, comp, matid=None, **_kwargs):
c_pos2 = bnd_position(comp.alts[0])
ret.ed_dist = b_pos2[1] - c_pos2[1]
ret.state &= b_pos2[0] == c_pos2[0]
- ret.state &= ret.ed_dist < self.matcher.params.bnddist
+ ret.state &= ret.ed_dist < self.params.bnddist
self.compare_gts(ret, base, comp)
# Score is percent of allowed distance needed to find this match
ret.score = (1 - ((abs(ret.st_dist) + abs(ret.ed_dist)) /
- 2) / self.matcher.params.bnddist) * 100
+ 2) / self.params.bnddist) * 100
# I think I'm missing GT stuff here
- logging.debug("BND match -> %s", ret)
return ret
############################