-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
expanding VariantParams documentation
And cleaning it up
- Loading branch information
Showing
5 changed files
with
110 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
repo_utils/sub_tests/unittest.sh → repo_utils/sub_tests/unittests.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# ------------------------------------------------------------ | ||
# unittest | ||
# unittests | ||
# ------------------------------------------------------------ | ||
run unittest coverage run --concurrency=multiprocessing -p repo_utils/run_unittest.py | ||
run unittests coverage run --concurrency=multiprocessing -p repo_utils/run_unittest.py | ||
if [ $unittest ]; then | ||
assert_exit_code 0 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,86 +1,123 @@ | ||
""" | ||
Truvari main parameters | ||
""" | ||
import types | ||
|
||
|
||
#pylint: disable=too-few-public-methods | ||
class VariantParams(): | ||
""" | ||
Holds variant parsing and matching parameters. | ||
Example | ||
>>> import truvari | ||
>>> p = truvari.VariantParams(pctseq=0) | ||
>>> v = truvari.VariantFile('repo_utils/test_files/variants/input1.vcf.gz', params=p) | ||
>>> one = next(v); two = next(v) | ||
>>> one.match(two) | ||
<truvari.bench.MatchResult (False 2.381)> | ||
Attributes | ||
---------- | ||
.. list-table:: | ||
:header-rows: 1 | ||
* - Attribute | ||
- Description | ||
* - `refdist` | ||
- Distance threshold for comparing positions in the reference genome. Default: 500. | ||
* - `pctseq` | ||
- Minimum percentage of sequence similarity required for a match. Default: 0.70 (70%). | ||
* - `pctsize` | ||
- Minimum percentage of size similarity required for a match. Default: 0.70 (70%). | ||
* - `pctovl` | ||
- Minimum percentage of reciprocal overlap required for comparing variants. Default: 0.0 (disabled). | ||
* - `typeignore` | ||
- Whether to ignore variant type mismatches during comparison. Default: `False`. | ||
* - `no_roll` | ||
- Whether to disable rolling of sequences for comparisons. Default: `False`. | ||
* - `chunksize` | ||
- Number of entries to process in each chunk. Default: 1000. | ||
* - `bSample` | ||
- Sample index for the "base" (a.k.a. self) variants during comparisons. Default: 0. | ||
* - `cSample` | ||
- Sample index for the "comparison" (a.k.a. other) variants during comparisons. Default: 0. | ||
* - `dup_to_ins` | ||
- Whether to treat duplications as insertions for some operations. Default: `False`. | ||
* - `bnddist` | ||
- Maximum allowed distance for breakend (BND) comparisons. Default: 100. | ||
* - `sizemin` | ||
- Minimum variant size to consider. Default: 50. | ||
* - `sizefilt` | ||
- Minimum size filter for comparison in the "comparison" dataset. Default: 30. | ||
* - `sizemax` | ||
- Maximum variant size to consider. Default: 50000. | ||
* - `passonly` | ||
- Whether to only consider variants with a "PASS" filter status. Default: `False`. | ||
* - `no_ref` | ||
- Whether to ignore reference homozygous variants in (a)ll, (b)ase, or (c)omp VCF Default: `False` (off)`. | ||
* - `pick` | ||
- Strategy for picking matches by Bench (single, ac, multi). | ||
* - `ignore_monref` | ||
- Whether to ignore monoallelic reference calls. Default: `True`. | ||
* - `check_multi` | ||
- Whether to check for and handle multi-allelic records. Default: `True`. | ||
* - `check_monref` | ||
- Whether to check for monoallelic reference calls. Default: `True`. | ||
* - `no_single_bnd` | ||
- Whether to exclude single-end breakends (BNDs) from comparisons. Default: `True`. | ||
* - `write_resolved` | ||
- Whether to write resolved REF/ALT sequences to output. Default: `False`. | ||
* - `short_circuit` | ||
- Whether to enable short-circuit logic for early exits in comparisons. Default: `False`. | ||
* - `skip_gt` | ||
- Whether to skip genotype comparisons. Default: `False`. | ||
Look at `VariantParams.make_params()` for a list of all params and their defaults | ||
""" | ||
|
||
DEFAULTS = { | ||
"reference": None, | ||
"refdist": 500, | ||
"pctseq": 0.70, | ||
"pctsize": 0.70, | ||
"pctovl": 0.0, | ||
"typeignore": False, | ||
"no_roll": False, | ||
"chunksize": 1000, | ||
"bSample": 0, | ||
"cSample": 0, | ||
"dup_to_ins": False, | ||
"bnddist": 100, | ||
"sizemin": 50, | ||
"sizefilt": 30, | ||
"sizemax": 50000, | ||
"passonly": False, | ||
"no_ref": False, | ||
"pick": "single", | ||
"ignore_monref": True, | ||
"check_multi": True, | ||
"check_monref": True, | ||
"no_single_bnd": True, | ||
"write_resolved": False, | ||
"short_circuit": False, | ||
"skip_gt": False, | ||
} | ||
|
||
def __init__(self, args=None, **kwargs): | ||
""" | ||
Initalize. args is a Namespace from argparse | ||
Initialize VariantParams with defaults, args, and kwargs. | ||
Parameters | ||
---------- | ||
args : Namespace (optional) | ||
An argparse.Namespace object to initialize parameters. | ||
kwargs : dict | ||
Additional parameters to override defaults. | ||
""" | ||
if args is not None: | ||
params = self.make_params_from_args(args) | ||
else: | ||
params = self.make_params() | ||
# Start with defaults | ||
params = self.DEFAULTS.copy() | ||
|
||
# Override with args if provided | ||
if args: | ||
for key in vars(args): | ||
if key in params: | ||
params[key] = getattr(args, key) | ||
|
||
# Override parameters with those provided in kwargs | ||
# Override with kwargs | ||
for key, value in kwargs.items(): | ||
if hasattr(params, key): | ||
setattr(params, key, value) | ||
if key in params: | ||
params[key] = value | ||
else: | ||
raise ValueError(f"Invalid parameter: {key}") | ||
|
||
for key, value in params.__dict__.items(): | ||
setattr(self, key, value) | ||
|
||
@staticmethod | ||
def make_params(): | ||
""" | ||
Makes a simple namespace of matching parameters. Holds defaults | ||
""" | ||
params = types.SimpleNamespace() | ||
params.reference = None | ||
params.refdist = 500 | ||
params.pctseq = 0.70 | ||
params.pctsize = 0.70 | ||
params.pctovl = 0.0 | ||
params.typeignore = False | ||
params.no_roll = False | ||
params.chunksize = 1000 | ||
params.bSample = 0 | ||
params.cSample = 0 | ||
params.dup_to_ins = False | ||
params.bnddist = 100 | ||
params.sizemin = 50 | ||
params.sizefilt = 30 | ||
params.sizemax = 50000 | ||
params.passonly = False | ||
params.no_ref = False | ||
params.pick = 'single' | ||
params.ignore_monref = True | ||
params.check_multi = True | ||
params.check_monref = True | ||
params.no_single_bnd = True | ||
params.write_resolved = False | ||
params.short_circuit = False | ||
params.skip_gt = False | ||
return params | ||
|
||
@staticmethod | ||
def make_params_from_args(args): | ||
""" | ||
Makes a simple namespace of matching parameters. | ||
Populates defaults from make_params, then updates with values from args. | ||
""" | ||
ret = VariantParams.make_params() | ||
|
||
for key in vars(ret): | ||
if hasattr(args, key): | ||
setattr(ret, key, getattr(args, key)) | ||
|
||
return ret | ||
# Set attributes | ||
self.__dict__.update(params) |