Skip to content

Commit

Permalink
prototype complete
Browse files Browse the repository at this point in the history
  • Loading branch information
ACEnglish committed Jan 5, 2025
1 parent 36b8747 commit 957e16a
Show file tree
Hide file tree
Showing 13 changed files with 60 additions and 63 deletions.
4 changes: 2 additions & 2 deletions imgs/coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz.tbi
Binary file not shown.
54 changes: 27 additions & 27 deletions repo_utils/answer_key/bench/bench_bnd/log.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
2025-01-05 01:05:59,220 [INFO] Truvari v5.0.0
2025-01-05 01:05:59,220 [INFO] Command /Users/english/py/bin/truvari bench -p 0 -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -o test_results/bnd_test
2025-01-05 01:05:59,220 [INFO] Params:
2025-01-05 07:08:37,421 [INFO] Truvari v5.0.0
2025-01-05 07:08:37,422 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/
2025-01-05 07:08:37,423 [INFO] Params:
{
"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz",
"comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp.vcf.gz",
"output": "test_results/bnd_test",
"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz",
"output": "test_results/bench_bnd/",
"includebed": null,
"extend": 0,
"debug": false,
Expand All @@ -29,33 +29,33 @@
"check_monref": true,
"check_multi": true
}
2025-01-05 01:05:59,235 [WARNING] Excluding 193 contigs present in comparison calls header but not baseline calls.
2025-01-05 01:05:59,283 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175})
2025-01-05 01:05:59,283 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113})
2025-01-05 01:05:59,338 [INFO] Stats: {
"TP-base": 89,
"TP-comp": 89,
2025-01-05 07:08:37,460 [WARNING] Excluding 193 contigs present in comparison calls header but not baseline calls.
2025-01-05 07:08:37,552 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175})
2025-01-05 07:08:37,553 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113})
2025-01-05 07:08:37,638 [INFO] Stats: {
"TP-base": 85,
"TP-comp": 85,
"FP": 81,
"FN": 24,
"precision": 0.5235294117647059,
"recall": 0.7876106194690266,
"f1": 0.6289752650176679,
"FN": 28,
"precision": 0.5120481927710844,
"recall": 0.7522123893805309,
"f1": 0.6093189964157706,
"base cnt": 113,
"comp cnt": 170,
"TP-comp_TP-gt": 54,
"TP-comp_FP-gt": 35,
"TP-base_TP-gt": 54,
"TP-base_FP-gt": 35,
"gt_concordance": 0.6067415730337079,
"comp cnt": 166,
"TP-comp_TP-gt": 68,
"TP-comp_FP-gt": 17,
"TP-base_TP-gt": 68,
"TP-base_FP-gt": 17,
"gt_concordance": 0.8,
"gt_matrix": {
"None": {
"None": 18
},
"(0, 1)": {
"(0, 1)": 54,
"(0, 1)": 65,
"(0, 0)": 16,
"(1, 1)": 1
},
"(1, 0)": {
"(0, 1)": 3
}
}
}
2025-01-05 01:05:59,338 [INFO] Finished bench
2025-01-05 07:08:37,638 [INFO] Finished bench
2 changes: 1 addition & 1 deletion repo_utils/answer_key/bench/bench_bnd/params.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"base": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/Users/english/code/truvari/repo_utils/test_files/variants/bnd.comp.vcf.gz", "output": "test_results/bnd_test", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_T_hiphase.haplotagged", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true}
{"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz", "output": "test_results/bench_bnd/", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_T_hiphase.haplotagged", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true}
32 changes: 16 additions & 16 deletions repo_utils/answer_key/bench/bench_bnd/summary.json
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
{
"TP-base": 89,
"TP-comp": 89,
"TP-base": 85,
"TP-comp": 85,
"FP": 81,
"FN": 24,
"precision": 0.5235294117647059,
"recall": 0.7876106194690266,
"f1": 0.6289752650176679,
"FN": 28,
"precision": 0.5120481927710844,
"recall": 0.7522123893805309,
"f1": 0.6093189964157706,
"base cnt": 113,
"comp cnt": 170,
"TP-comp_TP-gt": 54,
"TP-comp_FP-gt": 35,
"TP-base_TP-gt": 54,
"TP-base_FP-gt": 35,
"gt_concordance": 0.6067415730337079,
"comp cnt": 166,
"TP-comp_TP-gt": 68,
"TP-comp_FP-gt": 17,
"TP-base_TP-gt": 68,
"TP-base_FP-gt": 17,
"gt_concordance": 0.8,
"gt_matrix": {
"None": {
"None": 18
},
"(0, 1)": {
"(0, 1)": 54,
"(0, 1)": 65,
"(0, 0)": 16,
"(1, 1)": 1
},
"(1, 0)": {
"(0, 1)": 3
}
}
}
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench_bnd/tp-comp.vcf.gz.tbi
Binary file not shown.
2 changes: 1 addition & 1 deletion repo_utils/sub_tests/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,5 @@ run test_bench_bnd $truv bench -b $INDIR/variants/bnd.base.vcf.gz \
-c $INDIR/variants/bnd.comp.vcf.gz \
-p 0 -o $OD/bench_bnd/
if [ $test_bench_bnd ]; then
bnech_assert _bnd
bench_assert _bnd
fi
20 changes: 9 additions & 11 deletions truvari/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,16 +519,15 @@ def compare_chunk(self, chunk):
chunk_dict["base"], chunk_dict["comp"], chunk_id)
self.check_refine_candidate(result)
# Check BNDs separately
if self.matcher.params.bnddist != -1:
if self.matcher.params.bnddist != -1 and (chunk_dict['base_BND'] or chunk_dict['comp_BND']):
result.extend(self.compare_calls(chunk_dict['base_BND'],
chunk_dict['comp_BND'], chunk_id, True))
return result

def compare_calls(self, base_variants, comp_variants, chunk_id=0, bnds=False):
def compare_calls(self, base_variants, comp_variants, chunk_id=0, is_bnds=False):
"""
Builds MatchResults, returns them as a numpy matrix if there's at least one base and one comp variant.
Otherwise, returns a list of the variants placed in MatchResults
sends to build_matrix if not bnd, else bnd_build_matrix
"""
# All FPs
if len(base_variants) == 0:
Expand Down Expand Up @@ -565,30 +564,29 @@ def compare_calls(self, base_variants, comp_variants, chunk_id=0, bnds=False):
cnt += 1
pos.extend(truvari.entry_boundaries(i))
chrom = i.chrom
logging.warning(
"Skipping region %s:%d-%d with %d variants", chrom, min(*pos), max(*pos), cnt)
logging.warning("Skipping region %s:%d-%d with %d variants",
chrom, min(*pos), max(*pos), cnt)
return []

match_matrix = self.build_matrix(
base_variants, comp_variants, chunk_id, bnds)
match_matrix = self.build_matrix(base_variants, comp_variants, chunk_id, is_bnds=is_bnds)
if isinstance(match_matrix, list):
return match_matrix
return PICKERS[self.matcher.params.pick](match_matrix)

def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, bnds=False):
def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, is_bnds=False):
"""
Builds MatchResults, returns them as a numpy matrix
"""
matcher = self.matcher.build_match if not is_bnds else self.matcher.bnd_build_match
if not base_variants or not comp_variants:
raise RuntimeError(
"Expected at least one base and one comp variant")
match_matrix = []
for bid, b in enumerate(base_variants):
base_matches = []
for cid, c in enumerate(comp_variants):
matcher = self.matcher.build_match if not bnds else self.matcher.bnd_build_match
mat = matcher(
b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"], skip_gt, self.short_circuit)
mat = matcher(b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"],
skip_gt, self.short_circuit)
logging.debug("Made mat -> %s", mat)
base_matches.append(mat)
match_matrix.append(base_matches)
Expand Down
9 changes: 4 additions & 5 deletions truvari/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def build_match(self, base, comp, matid=None, skip_gt=False, short_circuit=False

return ret

def bnd_build_match(self, base, comp, matid=None, **_kwargs):
def bnd_build_match(self, base, comp, matid=None, *_args, **_kwargs):
"""
Build a MatchResult for bnds
"""
Expand All @@ -271,7 +271,7 @@ def bnd_build_match(self, base, comp, matid=None, **_kwargs):
ret.state = base.chrom == comp.chrom

ret.st_dist = base.pos - comp.pos
ret.state &= abs(ret.st_dist) < self.matcher.params.bnddist
ret.state &= abs(ret.st_dist) < self.params.bnddist
b_bnd = bnd_direction_strand(base.alts[0])
c_bnd = bnd_direction_strand(comp.alts[0])
ret.state &= b_bnd == c_bnd
Expand All @@ -280,15 +280,14 @@ def bnd_build_match(self, base, comp, matid=None, **_kwargs):
c_pos2 = bnd_position(comp.alts[0])
ret.ed_dist = b_pos2[1] - c_pos2[1]
ret.state &= b_pos2[0] == c_pos2[0]
ret.state &= ret.ed_dist < self.matcher.params.bnddist
ret.state &= ret.ed_dist < self.params.bnddist

self.compare_gts(ret, base, comp)

# Score is percent of allowed distance needed to find this match
ret.score = (1 - ((abs(ret.st_dist) + abs(ret.ed_dist)) /
2) / self.matcher.params.bnddist) * 100
2) / self.params.bnddist) * 100
# I think I'm missing GT stuff here
logging.debug("BND match -> %s", ret)
return ret

############################
Expand Down

0 comments on commit 957e16a

Please sign in to comment.