Skip to content

Commit

Permalink
Merge branch 'feature/bndbench' into develop
Browse files Browse the repository at this point in the history
Now able to compare bnds
  • Loading branch information
ACEnglish committed Jan 5, 2025
2 parents feb65e5 + 957e16a commit feddaed
Show file tree
Hide file tree
Showing 20 changed files with 322 additions and 38 deletions.
4 changes: 2 additions & 2 deletions imgs/coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
64 changes: 64 additions & 0 deletions repo_utils/answer_key/bench/bench_bnd/candidate.refine.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
chr1 181135 181156
chr1 6005049 6006670
chr1 224013761 224013782
chr10 1238267 1238288
chr10 39478703 39478724
chr10 132505641 132506422
chr11 964817 964838
chr11 44662925 44662946
chr11 48880455 48880476
chr11 48888289 48888310
chr11 68232223 68232244
chr11 80583401 80583422
chr11 105902622 105902643
chr12 2572354 2572375
chr12 10428997 10444441
chr12 26566549 26566649
chr12 106703356 106718437
chr12 132242311 132242332
chr13 56380894 56381040
chr13 114073358 114074122
chr14 102919538 102919632
chr15 74894096 74894117
chr16 1025231 1025252
chr16 49626994 49627100
chr16 89536459 89536480
chr17 31964205 32004138
chr17 77850848 77850938
chr17 83120464 83120485
chr17 83127668 83127689
chr18 70465189 70465210
chr18 78390326 78390347
chr19 395626 395835
chr19 3477781 3477868
chr19 29439299 29439424
chr19 38981201 38983536
chr19 55068010 55068465
chr2 9737589 9737610
chr2 138335007 138335083
chr2 157917136 157917157
chr2 238119351 238119427
chr20 16410045 16410066
chr20 38428231 38428252
chr20 62755268 62755289
chr20 63443033 63443054
chr22 31903282 31903303
chr22 45328219 45328240
chr22 47732364 47732476
chr22 49494347 49494368
chr3 112164423 112164444
chr3 184714589 184714610
chr4 1429347 1434609
chr4 8631479 8637158
chr5 71009525 71015235
chr5 176031036 176031057
chr7 23502979 23503000
chr7 68922678 68950323
chr7 75988853 75988924
chr7 158151440 158151461
chr8 48193098 48193205
chr8 77023596 77040442
chr8 143669349 143669370
chr9 104414084 104414105
chr9 136535716 136535737
chrX 125901244 125901387
Binary file added repo_utils/answer_key/bench/bench_bnd/fn.vcf.gz
Binary file not shown.
Binary file not shown.
Binary file added repo_utils/answer_key/bench/bench_bnd/fp.vcf.gz
Binary file not shown.
Binary file not shown.
61 changes: 61 additions & 0 deletions repo_utils/answer_key/bench/bench_bnd/log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
2025-01-05 07:08:37,421 [INFO] Truvari v5.0.0
2025-01-05 07:08:37,422 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/bnd.base.vcf.gz -c repo_utils/test_files/variants/bnd.comp.vcf.gz -p 0 -o test_results/bench_bnd/
2025-01-05 07:08:37,423 [INFO] Params:
{
"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz",
"output": "test_results/bench_bnd/",
"includebed": null,
"extend": 0,
"debug": false,
"reference": null,
"refdist": 500,
"pctseq": 0.0,
"pctsize": 0.7,
"pctovl": 0.0,
"typeignore": false,
"no_roll": true,
"chunksize": 1000,
"bSample": "HG008-T",
"cSample": "HG008_T_hiphase.haplotagged",
"dup_to_ins": false,
"bnddist": 100,
"sizemin": 50,
"sizefilt": 30,
"sizemax": 50000,
"passonly": false,
"no_ref": false,
"pick": "single",
"check_monref": true,
"check_multi": true
}
2025-01-05 07:08:37,460 [WARNING] Excluding 193 contigs present in comparison calls header but not baseline calls.
2025-01-05 07:08:37,552 [INFO] Zipped 418 variants Counter({'comp': 243, 'base': 175})
2025-01-05 07:08:37,553 [INFO] 185 chunks of 418 variants Counter({'comp': 190, '__filtered': 115, 'base': 113})
2025-01-05 07:08:37,638 [INFO] Stats: {
"TP-base": 85,
"TP-comp": 85,
"FP": 81,
"FN": 28,
"precision": 0.5120481927710844,
"recall": 0.7522123893805309,
"f1": 0.6093189964157706,
"base cnt": 113,
"comp cnt": 166,
"TP-comp_TP-gt": 68,
"TP-comp_FP-gt": 17,
"TP-base_TP-gt": 68,
"TP-base_FP-gt": 17,
"gt_concordance": 0.8,
"gt_matrix": {
"(0, 1)": {
"(0, 1)": 65,
"(0, 0)": 16,
"(1, 1)": 1
},
"(1, 0)": {
"(0, 1)": 3
}
}
}
2025-01-05 07:08:37,638 [INFO] Finished bench
1 change: 1 addition & 0 deletions repo_utils/answer_key/bench/bench_bnd/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"base": "/data/repo_utils/test_files/variants/bnd.base.vcf.gz", "comp": "/data/repo_utils/test_files/variants/bnd.comp.vcf.gz", "output": "test_results/bench_bnd/", "includebed": null, "extend": 0, "debug": false, "reference": null, "refdist": 500, "pctseq": 0.0, "pctsize": 0.7, "pctovl": 0.0, "typeignore": false, "no_roll": true, "chunksize": 1000, "bSample": "HG008-T", "cSample": "HG008_T_hiphase.haplotagged", "dup_to_ins": false, "bnddist": 100, "sizemin": 50, "sizefilt": 30, "sizemax": 50000, "passonly": false, "no_ref": false, "pick": "single", "check_monref": true, "check_multi": true}
26 changes: 26 additions & 0 deletions repo_utils/answer_key/bench/bench_bnd/summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"TP-base": 85,
"TP-comp": 85,
"FP": 81,
"FN": 28,
"precision": 0.5120481927710844,
"recall": 0.7522123893805309,
"f1": 0.6093189964157706,
"base cnt": 113,
"comp cnt": 166,
"TP-comp_TP-gt": 68,
"TP-comp_FP-gt": 17,
"TP-base_TP-gt": 68,
"TP-base_FP-gt": 17,
"gt_concordance": 0.8,
"gt_matrix": {
"(0, 1)": {
"(0, 1)": 65,
"(0, 0)": 16,
"(1, 1)": 1
},
"(1, 0)": {
"(0, 1)": 3
}
}
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7 changes: 7 additions & 0 deletions repo_utils/sub_tests/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,10 @@ run test_bench_starallele $truv bench -b $INDIR/variants/star.base.vcf.gz \
if [ $test_bench_starallele ]; then
bench_assert _starallele
fi

run test_bench_bnd $truv bench -b $INDIR/variants/bnd.base.vcf.gz \
-c $INDIR/variants/bnd.comp.vcf.gz \
-p 0 -o $OD/bench_bnd/
if [ $test_bench_bnd ]; then
bench_assert _bnd
fi
Binary file added repo_utils/test_files/variants/bnd.base.vcf.gz
Binary file not shown.
Binary file not shown.
Binary file added repo_utils/test_files/variants/bnd.comp.vcf.gz
Binary file not shown.
Binary file not shown.
33 changes: 21 additions & 12 deletions truvari/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def parse_args(args):
help="Number of matches reported per-call (%(default)s)")
thresg.add_argument("--dup-to-ins", action="store_true",
help="Assume DUP svtypes are INS (%(default)s)")
thresg.add_argument("-B", "--bnddist", type=int, default=defaults.bnddist,
help="Maximum distance allowed between BNDs (%(default)s; -1=off)")
thresg.add_argument("-C", "--chunksize", type=truvari.restricted_int, default=defaults.chunksize,
help="Max reference distance to compare calls (%(default)s)")

Expand Down Expand Up @@ -497,7 +499,7 @@ def run(self):
if (self.extend
and (match.comp is not None)
and not match.state
and not truvari.entry_within_tree(match.comp, region_tree)):
and not truvari.entry_within_tree(match.comp, region_tree)):
match.comp = None
output.write_match(match)

Expand All @@ -516,9 +518,13 @@ def compare_chunk(self, chunk):
result = self.compare_calls(
chunk_dict["base"], chunk_dict["comp"], chunk_id)
self.check_refine_candidate(result)
# Check BNDs separately
if self.matcher.params.bnddist != -1 and (chunk_dict['base_BND'] or chunk_dict['comp_BND']):
result.extend(self.compare_calls(chunk_dict['base_BND'],
chunk_dict['comp_BND'], chunk_id, True))
return result

def compare_calls(self, base_variants, comp_variants, chunk_id=0):
def compare_calls(self, base_variants, comp_variants, chunk_id=0, is_bnds=False):
"""
Builds MatchResults, returns them as a numpy matrix if there's at least one base and one comp variant.
Otherwise, returns a list of the variants placed in MatchResults
Expand Down Expand Up @@ -558,29 +564,29 @@ def compare_calls(self, base_variants, comp_variants, chunk_id=0):
cnt += 1
pos.extend(truvari.entry_boundaries(i))
chrom = i.chrom
logging.warning("Skipping region %s:%d-%d with %d variants", chrom, min(*pos), max(*pos), cnt)
logging.warning("Skipping region %s:%d-%d with %d variants",
chrom, min(*pos), max(*pos), cnt)
return []

match_matrix = self.build_matrix(
base_variants, comp_variants, chunk_id)
match_matrix = self.build_matrix(base_variants, comp_variants, chunk_id, is_bnds=is_bnds)
if isinstance(match_matrix, list):
return match_matrix
return PICKERS[self.matcher.params.pick](match_matrix)

def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False):
def build_matrix(self, base_variants, comp_variants, chunk_id=0, skip_gt=False, is_bnds=False):
"""
Builds MatchResults, returns them as a numpy matrix
"""
matcher = self.matcher.build_match if not is_bnds else self.matcher.bnd_build_match
if not base_variants or not comp_variants:
raise RuntimeError(
"Expected at least one base and one comp variant")
match_matrix = []
for bid, b in enumerate(base_variants):
base_matches = []
for cid, c in enumerate(comp_variants):
mat = self.matcher.build_match(
b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"],
skip_gt, self.short_circuit)
mat = matcher(b, c, [f"{chunk_id}.{bid}", f"{chunk_id}.{cid}"],
skip_gt, self.short_circuit)
logging.debug("Made mat -> %s", mat)
base_matches.append(mat)
match_matrix.append(base_matches)
Expand All @@ -603,14 +609,17 @@ def check_refine_candidate(self, result):
chrom = match.comp.chrom
pos.extend(truvari.entry_boundaries(match.comp))
if has_unmatched and pos:
buf = 10 # min(10, self.matcher.params.chunksize) need to make sure the refine covers the region
# min(10, self.matcher.params.chunksize) need to make sure the refine covers the region
buf = 10
start = max(0, min(*pos) - buf)
self.refine_candidates.append(f"{chrom}\t{start}\t{max(*pos) + buf}")

self.refine_candidates.append(
f"{chrom}\t{start}\t{max(*pos) + buf}")

#################
# Match Pickers #
#################


def pick_multi_matches(match_matrix):
"""
Given a numpy array of MatchResults
Expand Down
Loading

0 comments on commit feddaed

Please sign in to comment.