Skip to content

Commit

Permalink
API refactor
Browse files Browse the repository at this point in the history
No longer using pysam.VariantFile and pysam.VariantRecord, but truvari.VariantFile
and truvari.VariantRecord. These are simply wrappers around the pysam
objects that exposes advanced functonality to facilitate comparison of entries.
  • Loading branch information
ACEnglish committed Jan 6, 2025
1 parent 4562a5a commit a08f392
Show file tree
Hide file tree
Showing 60 changed files with 946 additions and 984 deletions.
Binary file modified repo_utils/answer_key/bench/bench12/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12/fn.vcf.gz.tbi
Binary file not shown.
14 changes: 7 additions & 7 deletions repo_utils/answer_key/bench/bench12/log.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
2025-01-06 03:03:52,435 [INFO] Truvari v5.0.0
2025-01-06 03:03:52,435 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input2.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench12/
2025-01-06 03:03:52,436 [INFO] Params:
2025-01-06 19:48:51,838 [INFO] Truvari v5.0.0
2025-01-06 19:48:51,838 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input2.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench12/
2025-01-06 19:48:51,839 [INFO] Params:
{
"base": "/data/repo_utils/test_files/variants/input1.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/input2.vcf.gz",
Expand Down Expand Up @@ -29,9 +29,9 @@
"check_monref": true,
"check_multi": true
}
2025-01-06 03:03:52,680 [INFO] Zipped 3936 variants Counter({'base': 2153, 'comp': 1783})
2025-01-06 03:03:52,681 [INFO] 12 chunks of 3936 variants Counter({'__filtered': 3904, 'base': 18, 'comp': 14})
2025-01-06 03:03:52,717 [INFO] Stats: {
2025-01-06 19:48:52,267 [INFO] Zipped 3936 variants Counter({'base': 2153, 'comp': 1783})
2025-01-06 19:48:52,268 [INFO] 12 chunks of 3936 variants Counter({'__filtered': 3904, 'base': 18, 'comp': 14})
2025-01-06 19:48:52,304 [INFO] Stats: {
"TP-base": 8,
"TP-comp": 8,
"FP": 3,
Expand Down Expand Up @@ -62,4 +62,4 @@
}
}
}
2025-01-06 03:03:52,718 [INFO] Finished bench
2025-01-06 19:48:52,304 [INFO] Finished bench
Binary file modified repo_utils/answer_key/bench/bench12/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/fn.vcf.gz.tbi
Binary file not shown.
14 changes: 7 additions & 7 deletions repo_utils/answer_key/bench/bench12_gtcomp/log.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
2025-01-06 03:04:01,383 [INFO] Truvari v5.0.0
2025-01-06 03:04:01,383 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input2.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench12_gtcomp/ --pick ac
2025-01-06 03:04:01,384 [INFO] Params:
2025-01-06 19:49:00,627 [INFO] Truvari v5.0.0
2025-01-06 19:49:00,628 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input2.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench12_gtcomp/ --pick ac
2025-01-06 19:49:00,628 [INFO] Params:
{
"base": "/data/repo_utils/test_files/variants/input1.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/input2.vcf.gz",
Expand Down Expand Up @@ -29,9 +29,9 @@
"check_monref": true,
"check_multi": true
}
2025-01-06 03:04:01,649 [INFO] Zipped 3936 variants Counter({'base': 2153, 'comp': 1783})
2025-01-06 03:04:01,650 [INFO] 12 chunks of 3936 variants Counter({'__filtered': 3904, 'base': 18, 'comp': 14})
2025-01-06 03:04:01,689 [INFO] Stats: {
2025-01-06 19:49:01,097 [INFO] Zipped 3936 variants Counter({'base': 2153, 'comp': 1783})
2025-01-06 19:49:01,097 [INFO] 12 chunks of 3936 variants Counter({'__filtered': 3904, 'base': 18, 'comp': 14})
2025-01-06 19:49:01,141 [INFO] Stats: {
"TP-base": 9,
"TP-comp": 8,
"FP": 3,
Expand Down Expand Up @@ -62,4 +62,4 @@
}
}
}
2025-01-06 03:04:01,690 [INFO] Finished bench
2025-01-06 19:49:01,141 [INFO] Finished bench
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench12_gtcomp/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13/fn.vcf.gz.tbi
Binary file not shown.
14 changes: 7 additions & 7 deletions repo_utils/answer_key/bench/bench13/log.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
2025-01-06 03:03:53,974 [INFO] Truvari v5.0.0
2025-01-06 03:03:53,975 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input3.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench13/
2025-01-06 03:03:53,975 [INFO] Params:
2025-01-06 19:46:06,885 [INFO] Truvari v5.0.0
2025-01-06 19:46:06,886 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input3.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench13/
2025-01-06 19:46:06,887 [INFO] Params:
{
"base": "/data/repo_utils/test_files/variants/input1.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/input3.vcf.gz",
Expand Down Expand Up @@ -29,9 +29,9 @@
"check_monref": true,
"check_multi": true
}
2025-01-06 03:03:54,559 [INFO] Zipped 4218 variants Counter({'base': 2153, 'comp': 2065})
2025-01-06 03:03:54,560 [INFO] 14 chunks of 4218 variants Counter({'__filtered': 4179, 'comp': 21, 'base': 18})
2025-01-06 03:03:54,606 [INFO] Stats: {
2025-01-06 19:46:07,352 [INFO] Zipped 4218 variants Counter({'base': 2153, 'comp': 2065})
2025-01-06 19:46:07,353 [INFO] 14 chunks of 4218 variants Counter({'__filtered': 4179, 'comp': 21, 'base': 18})
2025-01-06 19:46:07,392 [INFO] Stats: {
"TP-base": 10,
"TP-comp": 10,
"FP": 7,
Expand Down Expand Up @@ -61,4 +61,4 @@
}
}
}
2025-01-06 03:03:54,607 [INFO] Finished bench
2025-01-06 19:46:07,392 [INFO] Finished bench
Binary file modified repo_utils/answer_key/bench/bench13/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13_extend/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13_extend/fn.vcf.gz.tbi
Binary file not shown.
22 changes: 11 additions & 11 deletions repo_utils/answer_key/bench/bench13_extend/log.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
2025-01-06 03:03:58,792 [INFO] Truvari v5.0.0
2025-01-06 03:03:58,793 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input3.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench13_extend/ --includebed repo_utils/test_files/beds/include.bed --extend 500
2025-01-06 03:03:58,794 [INFO] Params:
2025-01-06 19:48:57,885 [INFO] Truvari v5.0.0
2025-01-06 19:48:57,886 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input3.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench13_extend/ --includebed repo_utils/test_files/beds/include.bed --extend 500
2025-01-06 19:48:57,886 [INFO] Params:
{
"base": "/data/repo_utils/test_files/variants/input1.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/input3.vcf.gz",
Expand Down Expand Up @@ -29,13 +29,13 @@
"check_monref": true,
"check_multi": true
}
2025-01-06 03:03:58,809 [INFO] Including 11 bed regions
2025-01-06 03:03:58,810 [INFO] Found 1 chromosomes with overlapping regions
2025-01-06 03:03:58,810 [INFO] Extending the regions by 500 bases
2025-01-06 03:03:58,812 [INFO] Found 1 chromosomes with overlapping regions
2025-01-06 03:03:58,942 [INFO] Zipped 2052 variants Counter({'base': 1132, 'comp': 920})
2025-01-06 03:03:58,942 [INFO] 10 chunks of 2052 variants Counter({'__filtered': 2025, 'comp': 15, 'base': 12})
2025-01-06 03:03:58,979 [INFO] Stats: {
2025-01-06 19:48:57,901 [INFO] Including 11 bed regions
2025-01-06 19:48:57,902 [INFO] Found 1 chromosomes with overlapping regions
2025-01-06 19:48:57,902 [INFO] Extending the regions by 500 bases
2025-01-06 19:48:57,903 [INFO] Found 1 chromosomes with overlapping regions
2025-01-06 19:48:58,130 [INFO] Zipped 2052 variants Counter({'base': 1132, 'comp': 920})
2025-01-06 19:48:58,131 [INFO] 10 chunks of 2052 variants Counter({'__filtered': 2025, 'comp': 15, 'base': 12})
2025-01-06 19:48:58,167 [INFO] Stats: {
"TP-base": 6,
"TP-comp": 6,
"FP": 4,
Expand Down Expand Up @@ -64,4 +64,4 @@
}
}
}
2025-01-06 03:03:58,979 [INFO] Finished bench
2025-01-06 19:48:58,168 [INFO] Finished bench
Binary file modified repo_utils/answer_key/bench/bench13_extend/tp-base.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13_extend/tp-base.vcf.gz.tbi
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13_includebed/fn.vcf.gz
Binary file not shown.
Binary file modified repo_utils/answer_key/bench/bench13_includebed/fn.vcf.gz.tbi
Binary file not shown.
18 changes: 9 additions & 9 deletions repo_utils/answer_key/bench/bench13_includebed/log.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
2025-01-06 03:03:57,421 [INFO] Truvari v5.0.0
2025-01-06 03:03:57,421 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input3.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench13_includebed/ --includebed repo_utils/test_files/beds/include.bed
2025-01-06 03:03:57,422 [INFO] Params:
2025-01-06 19:48:56,555 [INFO] Truvari v5.0.0
2025-01-06 19:48:56,556 [INFO] Command /data/truvari/__main__.py bench -b repo_utils/test_files/variants/input1.vcf.gz -c repo_utils/test_files/variants/input3.vcf.gz -f repo_utils/test_files/references/reference.fa --dup-to-ins -o test_results/bench13_includebed/ --includebed repo_utils/test_files/beds/include.bed
2025-01-06 19:48:56,556 [INFO] Params:
{
"base": "/data/repo_utils/test_files/variants/input1.vcf.gz",
"comp": "/data/repo_utils/test_files/variants/input3.vcf.gz",
Expand Down Expand Up @@ -29,11 +29,11 @@
"check_monref": true,
"check_multi": true
}
2025-01-06 03:03:57,439 [INFO] Including 11 bed regions
2025-01-06 03:03:57,440 [INFO] Found 1 chromosomes with overlapping regions
2025-01-06 03:03:57,568 [INFO] Zipped 2032 variants Counter({'base': 1132, 'comp': 900})
2025-01-06 03:03:57,568 [INFO] 9 chunks of 2032 variants Counter({'__filtered': 2009, 'base': 12, 'comp': 11})
2025-01-06 03:03:57,606 [INFO] Stats: {
2025-01-06 19:48:56,571 [INFO] Including 11 bed regions
2025-01-06 19:48:56,572 [INFO] Found 1 chromosomes with overlapping regions
2025-01-06 19:48:56,791 [INFO] Zipped 2032 variants Counter({'base': 1132, 'comp': 900})
2025-01-06 19:48:56,792 [INFO] 9 chunks of 2032 variants Counter({'__filtered': 2009, 'base': 12, 'comp': 11})
2025-01-06 19:48:56,828 [INFO] Stats: {
"TP-base": 5,
"TP-comp": 5,
"FP": 4,
Expand All @@ -59,4 +59,4 @@
}
}
}
2025-01-06 03:03:57,607 [INFO] Finished bench
2025-01-06 19:48:56,829 [INFO] Finished bench
Binary file modified repo_utils/answer_key/bench/bench13_includebed/tp-base.vcf.gz
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions repo_utils/answer_key/collapse/input1_collapsed.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -1177,9 +1177,9 @@ chr20 612157 . T G 60 . QNAME=HG002-S9-H1-000001F;QSTART=591666;QSTRAND=+ GT:PL:
chr20 612432 . T C 60 . QNAME=HG002-S9-H1-000001F;QSTART=591941;QSTRAND=+ GT:PL:DP 1|1:9,5,8:19,42
chr20 612664 . T C 60 . QNAME=HG002-S9-H1-000001F;QSTART=592173;QSTRAND=+ GT:PL:DP 1|1:8,9,7:8,15
chr20 612674 . A G 60 . QNAME=HG002-S9-H1-000001F;QSTART=592183;QSTRAND=+ GT:PL:DP 1|0:8,3,2:5,28
chr20 613783 . T <DEL> 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DEL;END=613837;SVLEN=-54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . T <DUP> 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DUP;END=613837;SVLEN=54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG CGTGCATGCTTAGCAGCAGTGGGAACCAAGGGACTTGAGCTGGACTCAGCACACA 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=INV;SVLEN=54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DEL;SVLEN=-54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . T <INV> 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=INV;END=613837;SVLEN=54 GT:PL:DP 1/1:6,8,8:40,41
chr20 614408 . C T 60 . QNAME=HG002-S9-H2-000001F;QSTART=568827;QSTRAND=+ GT:PL:DP 0|1:10,8,7:15,40
chr20 614449 . ATCTC A 60 . QNAME=HG002-S9-H1-000001F;QSTART=593905;QSTRAND=+ GT:PL:DP 1|0:6,2,2:41,8
chr20 614473 . C A 60 . QNAME=HG002-S9-H1-000001F;QSTART=593924;QSTRAND=+ GT:PL:DP 1|0:3,8,4:8,28
Expand Down
4 changes: 2 additions & 2 deletions repo_utils/answer_key/collapse/input1_median_collapsed.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -1180,9 +1180,9 @@ chr20 612157 . T G 60 . QNAME=HG002-S9-H1-000001F;QSTART=591666;QSTRAND=+ GT:PL:
chr20 612432 . T C 60 . QNAME=HG002-S9-H1-000001F;QSTART=591941;QSTRAND=+ GT:PL:DP 1|1:9,5,8:19,42
chr20 612664 . T C 60 . QNAME=HG002-S9-H1-000001F;QSTART=592173;QSTRAND=+ GT:PL:DP 1|1:8,9,7:8,15
chr20 612674 . A G 60 . QNAME=HG002-S9-H1-000001F;QSTART=592183;QSTRAND=+ GT:PL:DP 1|0:8,3,2:5,28
chr20 613783 . T <DEL> 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DEL;END=613837;SVLEN=-54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . T <DUP> 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DUP;END=613837;SVLEN=54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG CGTGCATGCTTAGCAGCAGTGGGAACCAAGGGACTTGAGCTGGACTCAGCACACA 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=INV;SVLEN=54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . TGTGTGCTGAGTCCAGCTCAAGTCCCTTGGTTCCCACTGCTGCTAAGCATGCACG T 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=DEL;SVLEN=-54 GT:PL:DP 1/1:6,8,8:40,41
chr20 613783 . T <INV> 60 . QNAME=HG002-S9-H1-000001F;QSTART=593293;QSTRAND=+;SVTYPE=INV;END=613837;SVLEN=54 GT:PL:DP 1/1:6,8,8:40,41
chr20 614408 . C T 60 . QNAME=HG002-S9-H2-000001F;QSTART=568827;QSTRAND=+ GT:PL:DP 0|1:10,8,7:15,40
chr20 614449 . ATCTC A 60 . QNAME=HG002-S9-H1-000001F;QSTART=593905;QSTRAND=+ GT:PL:DP 1|0:6,2,2:41,8
chr20 614473 . C A 60 . QNAME=HG002-S9-H1-000001F;QSTART=593924;QSTRAND=+ GT:PL:DP 1|0:3,8,4:8,28
Expand Down
13 changes: 6 additions & 7 deletions repo_utils/run_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""
import os
import sys
import pysam
from collections import defaultdict
from intervaltree import IntervalTree

Expand All @@ -22,10 +21,10 @@
region_start = 10
region_end = 20

vcf = pysam.VariantFile(vcf_fn)
vcf = truvari.VariantFile(vcf_fn)
for entry in vcf:
state = entry.info['include'] == 'in'
assert state == truvari.entry_within(entry, region_start, region_end), f"Bad Boundary {str(entry)}"
assert state == entry.within(region_start, region_end), f"Bad Boundary {str(entry)}"

# removed
#regions = truvari.RegionVCFIterator(vcf, includebed=bed_fn)
Expand All @@ -50,7 +49,7 @@
data = line.strip().split()
tree[data[0]].addi(int(data[1]), int(data[2]) + 1)

vcf = pysam.VariantFile(vcf_fn)
vcf = truvari.VariantFile(vcf_fn)
for entry in truvari.region_filter(vcf, tree, True, False):
assert entry.info['include'] == 'in', f"Bad in {str(entry)}"

Expand All @@ -59,23 +58,23 @@
assert entry.info['include'] == 'out', f"Bad out {str(entry)}"


vcf = pysam.VariantFile(vcf_fn)
vcf = truvari.VariantFile(vcf_fn)
for entry in truvari.region_filter_stream(vcf, tree, True, False):
assert entry.info['include'] == 'in', f"Bad in {str(entry)}"

vcf.reset()
for entry in truvari.region_filter_stream(vcf, tree, False, False):
assert entry.info['include'] == 'out', f"Bad out {str(entry)}"

vcf = pysam.VariantFile(vcf_fn)
vcf = truvari.VariantFile(vcf_fn)
for entry in truvari.region_filter_fetch(vcf, tree, False):
assert entry.info['include'] == 'in', f"Bad in {str(entry)}"


"""
Filtering logic
"""
vcf = pysam.VariantFile("repo_utils/test_files/variants/filter.vcf")
vcf = truvari.VariantFile("repo_utils/test_files/variants/filter.vcf")
matcher = truvari.Matcher()
matcher.params.sizemin = 0
matcher.params.sizefilt = 0
Expand Down
47 changes: 13 additions & 34 deletions truvari/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
"""
Truvari - SV comparison and annotation toolkit
See `help()` of specific functions / objects for details
VariantRecord methods:
Objects:
:meth:`entry_is_present`
:meth:`entry_reciprocal_overlap`
:meth:`entry_same_variant_type`
:meth:`entry_seq_similarity`
:meth:`entry_size`
:meth:`entry_size_similarity`
:meth:`entry_to_hash`
:meth:`entry_to_key`
:meth:`entry_within`
:class:`Bench`
:class:`BenchOutput`
:class:`GT`
:class:`RegionVCFIterator`
:class:`LogFileStderr`
:class:`MatchResult`
:class:`Matcher`
:class:`StatsBox`
:class:`SV`
:class:`VariantRecord`
Extra methods:
Expand Down Expand Up @@ -61,18 +61,6 @@
:meth:`setup_logging`
:meth:`vcf_to_df`
Objects:
:class:`Bench`
:class:`BenchOutput`
:class:`GT`
:class:`RegionVCFIterator`
:class:`LogFileStderr`
:class:`MatchResult`
:class:`Matcher`
:class:`StatsBox`
:class:`SV`
Data:
:data:`truvari.HEADERMAT`
Expand Down Expand Up @@ -101,16 +89,6 @@
from truvari.comparisons import (
best_seqsim,
coords_within,
entry_is_present,
entry_reciprocal_overlap,
entry_same_variant_type,
entry_seq_similarity,
entry_size,
entry_size_similarity,
entry_to_hash,
entry_to_key,
entry_within_tree,
entry_within,
overlap_percent,
overlaps,
reciprocal_overlap,
Expand Down Expand Up @@ -168,7 +146,8 @@
vcf_ranges,
)

from truvari.variant_record import (
from truvari.variants import (
VariantFile,
VariantRecord,
)

Expand Down
4 changes: 2 additions & 2 deletions truvari/annotations/af_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def allele_freq_annos(entry, samples=None):
Calculate allele annotations for a VCF Entry
:param `entry`: Entry with samples to parse
:type `entry`: :class:`pysam.VariantRecord`
:type `entry`: :class:`truvari.VariantRecord`
:param `samples`: Subset of samples from the entry over which to calculate annos
:type `samples`: list of strings, optional
Expand All @@ -155,7 +155,7 @@ def allele_freq_annos(entry, samples=None):
Example
>>> import truvari
>>> import pysam
>>> v = pysam.VariantFile('repo_utils/test_files/variants/multi.vcf.gz')
>>> v = truvari.VariantFile('repo_utils/test_files/variants/multi.vcf.gz')
>>> truvari.allele_freq_annos(next(v))
{'AF': 0.5, 'MAF': 0.5, 'ExcHet': 1.0, 'HWE': 1.0, 'MAC': 1, 'AC': [1, 1], 'AN': 2, 'N_HEMI': 0, 'N_HOMREF': 0, 'N_HET': 1, 'N_HOMALT': 0, 'N_MISS': 2}
"""
Expand Down
5 changes: 2 additions & 3 deletions truvari/annotations/bpovl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import argparse

import pysam
import joblib
import pandas as pd

Expand Down Expand Up @@ -62,7 +61,7 @@ def bpovl_main(cmdargs):
Main method
"""
args = parse_args(cmdargs)
in_vcf = pysam.VariantFile(args.input)
in_vcf = truvari.VariantFile(args.input)
anno_tree, anno_cnt = truvari.build_anno_tree(args.anno, *args.anno_psets)
logging.info("Loaded %d annotations", anno_cnt)

Expand All @@ -79,7 +78,7 @@ def _transform():
if entry.size() < args.sizemin:
continue

key = truvari.entry_to_hash(entry)
key = entry.to_hash()
for anno_idx in anno_tree[entry.chrom].at(start):
has_hit = True
yield [key, 'start_bnd', anno_idx.data]
Expand Down
5 changes: 2 additions & 3 deletions truvari/annotations/chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
"""
import sys
import argparse
import pysam

import truvari
from truvari.collapse import tree_size_chunker, tree_dist_chunker
Expand Down Expand Up @@ -41,15 +40,15 @@ def get_bounds(cnk):
mend = 0
for i in cnk:
mstart = min(mstart, i.start)
mend = max(mend, i.stop)
mend = max(mend, i.end)
return mstart, mend

def chunks_main(args):
"""
Main
"""
args = parse_args(args)
v = pysam.VariantFile(args.input)
v = truvari.VariantFile(args.input)
m = truvari.Matcher()
m.params.pctseq = 0
m.params.sizemin = args.sizemin
Expand Down
Loading

0 comments on commit a08f392

Please sign in to comment.