Skip to content

Commit

Permalink
code clean
Browse files Browse the repository at this point in the history
VariantParams better reflects its use. Split apart variants.py as the
code was getting way too long
  • Loading branch information
ACEnglish committed Jan 7, 2025
1 parent efaa5c5 commit c42ff8a
Show file tree
Hide file tree
Showing 23 changed files with 508 additions and 475 deletions.
2 changes: 1 addition & 1 deletion docs/api/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# -- Project information -----------------------------------------------------

project = 'Truvari'
copyright = '2023, Adam English'
copyright = '2025, Adam English'
author = 'Adam English'


Expand Down
11 changes: 6 additions & 5 deletions docs/api/truvari.examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ The `truvari.VariantRecord` simplifies comparing two VCF entries.
print("Entries' Size Similarity:", match.sizesim)
print("Is the match above thresholds:", match.state)
This returns a `truvari.MatchResult`. You can customize matching thresholds by providing a `truvari.Matcher` to the `truvari.VariantFile`.
This returns a `truvari.MatchResult`. You can customize matching thresholds by providing a `truvari.VariantParams` to the `truvari.VariantFile`.

.. code-block:: python
# Disable sequence and size similarity; enable reciprocal overlap
matcher = truvari.Matcher(seqsim=0, sizesim=0, recovl=0.5)
matcher = truvari.VariantParams(seqsim=0, sizesim=0, recovl=0.5)
vcf = truvari.VariantFile("input.vcf.gz", matcher=matcher)
entry1 = next(vcf)
entry2 = next(vcf)
Expand All @@ -46,11 +46,11 @@ This returns a `truvari.MatchResult`. You can customize matching thresholds by p
Filtering Variants
------------------

The `truvari.Matcher` provides parameters for filtering variants, such as minimum or maximum SV sizes.
The `truvari.VariantParams` provides parameters for filtering variants, such as minimum or maximum SV sizes.

.. code-block:: python
matcher = truvari.Matcher(sizemin=200, sizemax=500)
matcher = truvari.VariantParams(sizemin=200, sizemax=500)
vcf = truvari.VariantFile("input.vcf.gz", matcher=matcher)
# Retrieve all variant records within sizemin and sizemax
results = [entry for entry in vcf if not entry.size_filter()]
Expand All @@ -65,7 +65,8 @@ To subset a VCF to regions specified in a BED file, use:
.. code-block:: python
for entry in vcf.bed_fetch("regions.bed"):
print(entry.var_type(), entry.size())
print("Entry's variant type:", entry.var_type())
print("Entry's variant size:", entry.var_size())
If your regions of interest are stored in an in-memory object instead of a BED file, use the `.regions_fetch` method:

Expand Down
6 changes: 3 additions & 3 deletions docs/api/truvari.package.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ Variant Handling
.. autoclass:: VariantRecord
:members:

.. autoclass:: VariantParams
:members:

Extra Methods
-------------
.. autofunction:: bed_ranges
Expand Down Expand Up @@ -114,9 +117,6 @@ Objects
.. autoclass:: MatchResult
:members:

.. autoclass:: Matcher
:members:

.. autoclass:: SV
:members:

Expand Down
6 changes: 4 additions & 2 deletions repo_utils/run_doctests.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
comparisons,
msatovcf,
utils,
variants,
variant_file,
variant_record,
vcf2df,
)

Expand All @@ -27,7 +28,8 @@ def tester(module):
return ret.failed

fails = 0
fails += tester(variants)
fails += tester(variant_file)
fails += tester(variant_record)
fails += tester(comparisons)
fails += tester(utils)
fails += tester(vcf2df)
Expand Down
4 changes: 2 additions & 2 deletions repo_utils/run_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@
"""
Filtering logic
"""
matcher = truvari.Matcher(sizemin=0, sizefilt=0, passonly=True)
vcf = truvari.VariantFile("repo_utils/test_files/variants/filter.vcf", matcher=matcher)
p = truvari.VariantParams(sizemin=0, sizefilt=0, passonly=True)
vcf = truvari.VariantFile("repo_utils/test_files/variants/filter.vcf", params=p)
for entry in vcf:
try:
assert entry.filter_call(), f"Didn't filter {str(entry)}"
Expand Down
10 changes: 4 additions & 6 deletions truvari/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
:class:`GT`
:class:`LogFileStderr`
:class:`MatchResult`
:class:`Matcher`
:class:`StatsBox`
:class:`SV`
:class:`VariantFile`
:class:`VariantRecord`
:class:`VariantParams`
Extra methods:
Expand Down Expand Up @@ -89,7 +89,6 @@

from truvari.matching import (
MatchResult,
Matcher,
chunker,
file_zipper
)
Expand Down Expand Up @@ -132,10 +131,9 @@
vcf_ranges,
)

from truvari.variants import (
VariantFile,
VariantRecord,
)
from truvari.variant_file import VariantFile
from truvari.variant_params import VariantParams
from truvari.variant_record import VariantRecord

from truvari.vcf2df import (
GT,
Expand Down
2 changes: 1 addition & 1 deletion truvari/annotations/bpovl.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _transform():
if span > args.spanmax:
continue

if entry.size() < args.sizemin:
if entry.var_size() < args.sizemin:
continue

key = entry.to_hash()
Expand Down
2 changes: 1 addition & 1 deletion truvari/annotations/chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def chunks_main(args):
"""
args = parse_args(args)
v = truvari.VariantFile(args.input)
m = truvari.Matcher(args=args, pctseq=0)
m = truvari.VariantParams(args=args, pctseq=0)
if args.bed:
v = v.bed_fetch(args.bed)
c = truvari.chunker(m, ('base', v))
Expand Down
4 changes: 2 additions & 2 deletions truvari/annotations/hompct.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def get_pct(chrom, start, end):
tot = 0
homs = 0
for entry in v.fetch(chrom, max(0, start - args.buffer), min(v.header.contigs[chrom].length, end + args.buffer)):
if entry.size() > args.maxgt:
if entry.var_size() > args.maxgt:
continue
if truvari.get_gt(entry.samples[0]["GT"]).name == "HOM":
homs += 1
Expand All @@ -66,7 +66,7 @@ def get_pct(chrom, start, end):
out = truvari.VariantFile(args.output, 'w', header=header)
v2 = truvari.VariantFile(args.input)
for entry in v2:
if entry.size() >= args.minanno:
if entry.var_size() >= args.minanno:
entry.translate(header)
anno = get_pct(entry.chrom, *entry.boundaries())
if anno is not None:
Expand Down
2 changes: 1 addition & 1 deletion truvari/annotations/numneigh.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def run(self):
"""
last_pos = None
for entry in self.in_vcf:
size = entry.size()
size = entry.var_size()
if not last_pos:
last_pos = [entry.chrom, entry.start]

Expand Down
2 changes: 1 addition & 1 deletion truvari/annotations/remap.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def annotate_entry(self, entry):
"""
Annotates entries in the vcf and writes to new vcf
"""
if entry.size() >= self.min_length:
if entry.var_size() >= self.min_length:
entry.translate(self.n_header)
remap, hits = self.remap_entry(entry)
entry.info["REMAP"] = remap
Expand Down
4 changes: 2 additions & 2 deletions truvari/annotations/repmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def extract_seqs(self):
fh = truvari.VariantFile(self.in_vcf)
for pos, entry in enumerate(fh):
tot_cnt += 1
entry_size = entry.size()
entry_size = entry.var_size()
if self.min_length <= entry_size <= self.max_length:
cnt += 1
cntbp += entry_size
Expand Down Expand Up @@ -126,7 +126,7 @@ def annotate_entry(self, entry, hits):
"""
best_hit_pct = 0
best_hit = None
entry_size = entry.size()
entry_size = entry.var_size()
for hit in hits:
size_aln = abs(hit["RM_qstart"] - hit["RM_qend"]) + 1
pct = size_aln / entry_size # The TR that covers the most of the sequence
Expand Down
2 changes: 1 addition & 1 deletion truvari/annotations/svinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def add_svinfo(entry, min_size=0, n_header=None):
del entry.info['SVTYPE']
if "SVLEN" in entry.info:
del entry.info['SVLEN']
sz = entry.size()
sz = entry.var_size()
if sz < min_size:
return
if n_header:
Expand Down
6 changes: 3 additions & 3 deletions truvari/annotations/trf.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def annotate(self, entry, score_filter=True):
Figure out the hit and return
"""
svtype = entry.var_type()
sz = entry.size()
sz = entry.var_size()
repeat = []
if svtype == truvari.SV.DEL:
repeat = self.del_annotate(entry, sz, score_filter)
Expand Down Expand Up @@ -396,7 +396,7 @@ def process_ref_region(region, args):
continue

svtype = entry.var_type()
svlen = entry.size()
svlen = entry.var_size()
if svlen < args.min_length or svtype not in [truvari.SV.DEL, truvari.SV.INS]:
out.write(str(edit_entry(entry, None, new_header)))
continue
Expand Down Expand Up @@ -460,7 +460,7 @@ def process_tr_region(region, args):
if not (entry.start >= region["start"] and entry.stop < region["end"]):
continue
svtype = entry.var_type()
svlen = entry.size()
svlen = entry.var_size()
if svlen < args.min_length or svtype not in [truvari.SV.DEL, truvari.SV.INS]:
out.write(str(edit_entry(entry, None, new_header)))
continue
Expand Down
Loading

0 comments on commit c42ff8a

Please sign in to comment.