Skip to content

Commit

Permalink
expanding VariantParams documentation
Browse files Browse the repository at this point in the history
And cleaning it up
  • Loading branch information
ACEnglish committed Jan 7, 2025
1 parent c42ff8a commit 1c8b71f
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 73 deletions.
2 changes: 1 addition & 1 deletion docs/api/truvari.examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ This returns a `truvari.MatchResult`. You can customize matching thresholds by p
.. code-block:: python
# Disable sequence and size similarity; enable reciprocal overlap
matcher = truvari.VariantParams(seqsim=0, sizesim=0, recovl=0.5)
matcher = truvari.VariantParams(seqsim=0, sizesim=0, pctovl=0.5)
vcf = truvari.VariantFile("input.vcf.gz", matcher=matcher)
entry1 = next(vcf)
entry2 = next(vcf)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ------------------------------------------------------------
# unittest
# unittests
# ------------------------------------------------------------
run unittest coverage run --concurrency=multiprocessing -p repo_utils/run_unittest.py
run unittests coverage run --concurrency=multiprocessing -p repo_utils/run_unittest.py
if [ $unittest ]; then
assert_exit_code 0
fi
2 changes: 1 addition & 1 deletion repo_utils/truvari_ssshtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ source $TESTSRC/sub_tests/phab.sh
source $TESTSRC/sub_tests/refine.sh
source $TESTSRC/sub_tests/segment.sh
source $TESTSRC/sub_tests/stratify.sh
source $TESTSRC/sub_tests/unittest.sh
source $TESTSRC/sub_tests/unittests.sh
source $TESTSRC/sub_tests/vcf2df.sh
source $TESTSRC/sub_tests/version.sh

Expand Down
2 changes: 1 addition & 1 deletion truvari/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def parse_args(args):
"""
Pull the command line parameters
"""
defaults = truvari.VariantParams.make_params()
defaults = truvari.VariantParams()
parser = argparse.ArgumentParser(prog="bench", description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("-b", "--base", type=str, required=True,
Expand Down
173 changes: 105 additions & 68 deletions truvari/variant_params.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,123 @@
"""
Truvari main parameters
"""
import types


#pylint: disable=too-few-public-methods
class VariantParams():
"""
Holds variant parsing and matching parameters.
Example
>>> import truvari
>>> p = truvari.VariantParams(pctseq=0)
>>> v = truvari.VariantFile('repo_utils/test_files/variants/input1.vcf.gz', params=p)
>>> one = next(v); two = next(v)
>>> one.match(two)
<truvari.bench.MatchResult (False 2.381)>
Attributes
----------
.. list-table::
:header-rows: 1
* - Attribute
- Description
* - `refdist`
- Distance threshold for comparing positions in the reference genome. Default: 500.
* - `pctseq`
- Minimum percentage of sequence similarity required for a match. Default: 0.70 (70%).
* - `pctsize`
- Minimum percentage of size similarity required for a match. Default: 0.70 (70%).
* - `pctovl`
- Minimum percentage of reciprocal overlap required for comparing variants. Default: 0.0 (disabled).
* - `typeignore`
- Whether to ignore variant type mismatches during comparison. Default: `False`.
* - `no_roll`
- Whether to disable rolling of sequences for comparisons. Default: `False`.
* - `chunksize`
- Number of entries to process in each chunk. Default: 1000.
* - `bSample`
- Sample index for the "base" (a.k.a. self) variants during comparisons. Default: 0.
* - `cSample`
- Sample index for the "comparison" (a.k.a. other) variants during comparisons. Default: 0.
* - `dup_to_ins`
- Whether to treat duplications as insertions for some operations. Default: `False`.
* - `bnddist`
- Maximum allowed distance for breakend (BND) comparisons. Default: 100.
* - `sizemin`
- Minimum variant size to consider. Default: 50.
* - `sizefilt`
- Minimum size filter for comparison in the "comparison" dataset. Default: 30.
* - `sizemax`
- Maximum variant size to consider. Default: 50000.
* - `passonly`
- Whether to only consider variants with a "PASS" filter status. Default: `False`.
* - `no_ref`
- Whether to ignore reference homozygous variants in (a)ll, (b)ase, or (c)omp VCF Default: `False` (off)`.
* - `pick`
- Strategy for picking matches by Bench (single, ac, multi).
* - `ignore_monref`
- Whether to ignore monoallelic reference calls. Default: `True`.
* - `check_multi`
- Whether to check for and handle multi-allelic records. Default: `True`.
* - `check_monref`
- Whether to check for monoallelic reference calls. Default: `True`.
* - `no_single_bnd`
- Whether to exclude single-end breakends (BNDs) from comparisons. Default: `True`.
* - `write_resolved`
- Whether to write resolved REF/ALT sequences to output. Default: `False`.
* - `short_circuit`
- Whether to enable short-circuit logic for early exits in comparisons. Default: `False`.
* - `skip_gt`
- Whether to skip genotype comparisons. Default: `False`.
Look at `VariantParams.make_params()` for a list of all params and their defaults
"""

DEFAULTS = {
"reference": None,
"refdist": 500,
"pctseq": 0.70,
"pctsize": 0.70,
"pctovl": 0.0,
"typeignore": False,
"no_roll": False,
"chunksize": 1000,
"bSample": 0,
"cSample": 0,
"dup_to_ins": False,
"bnddist": 100,
"sizemin": 50,
"sizefilt": 30,
"sizemax": 50000,
"passonly": False,
"no_ref": False,
"pick": "single",
"ignore_monref": True,
"check_multi": True,
"check_monref": True,
"no_single_bnd": True,
"write_resolved": False,
"short_circuit": False,
"skip_gt": False,
}

def __init__(self, args=None, **kwargs):
"""
Initalize. args is a Namespace from argparse
Initialize VariantParams with defaults, args, and kwargs.
Parameters
----------
args : Namespace (optional)
An argparse.Namespace object to initialize parameters.
kwargs : dict
Additional parameters to override defaults.
"""
if args is not None:
params = self.make_params_from_args(args)
else:
params = self.make_params()
# Start with defaults
params = self.DEFAULTS.copy()

# Override with args if provided
if args:
for key in vars(args):
if key in params:
params[key] = getattr(args, key)

# Override parameters with those provided in kwargs
# Override with kwargs
for key, value in kwargs.items():
if hasattr(params, key):
setattr(params, key, value)
if key in params:
params[key] = value
else:
raise ValueError(f"Invalid parameter: {key}")

for key, value in params.__dict__.items():
setattr(self, key, value)

@staticmethod
def make_params():
"""
Makes a simple namespace of matching parameters. Holds defaults
"""
params = types.SimpleNamespace()
params.reference = None
params.refdist = 500
params.pctseq = 0.70
params.pctsize = 0.70
params.pctovl = 0.0
params.typeignore = False
params.no_roll = False
params.chunksize = 1000
params.bSample = 0
params.cSample = 0
params.dup_to_ins = False
params.bnddist = 100
params.sizemin = 50
params.sizefilt = 30
params.sizemax = 50000
params.passonly = False
params.no_ref = False
params.pick = 'single'
params.ignore_monref = True
params.check_multi = True
params.check_monref = True
params.no_single_bnd = True
params.write_resolved = False
params.short_circuit = False
params.skip_gt = False
return params

@staticmethod
def make_params_from_args(args):
"""
Makes a simple namespace of matching parameters.
Populates defaults from make_params, then updates with values from args.
"""
ret = VariantParams.make_params()

for key in vars(ret):
if hasattr(args, key):
setattr(ret, key, getattr(args, key))

return ret
# Set attributes
self.__dict__.update(params)

0 comments on commit 1c8b71f

Please sign in to comment.