From 1c8b71f16b5f54d4f5a222c64e32f3efae387ac5 Mon Sep 17 00:00:00 2001 From: Adam English Date: Tue, 7 Jan 2025 01:29:26 -0500 Subject: [PATCH] expanding VariantParams documentation And cleaning it up --- docs/api/truvari.examples.rst | 2 +- .../sub_tests/{unittest.sh => unittests.sh} | 4 +- repo_utils/truvari_ssshtests.sh | 2 +- truvari/bench.py | 2 +- truvari/variant_params.py | 173 +++++++++++------- 5 files changed, 110 insertions(+), 73 deletions(-) rename repo_utils/sub_tests/{unittest.sh => unittests.sh} (57%) diff --git a/docs/api/truvari.examples.rst b/docs/api/truvari.examples.rst index bba2bd4f..647d4ddd 100644 --- a/docs/api/truvari.examples.rst +++ b/docs/api/truvari.examples.rst @@ -37,7 +37,7 @@ This returns a `truvari.MatchResult`. You can customize matching thresholds by p .. code-block:: python # Disable sequence and size similarity; enable reciprocal overlap - matcher = truvari.VariantParams(seqsim=0, sizesim=0, recovl=0.5) + matcher = truvari.VariantParams(seqsim=0, sizesim=0, pctovl=0.5) vcf = truvari.VariantFile("input.vcf.gz", matcher=matcher) entry1 = next(vcf) entry2 = next(vcf) diff --git a/repo_utils/sub_tests/unittest.sh b/repo_utils/sub_tests/unittests.sh similarity index 57% rename from repo_utils/sub_tests/unittest.sh rename to repo_utils/sub_tests/unittests.sh index 2f32e160..e7200a7d 100644 --- a/repo_utils/sub_tests/unittest.sh +++ b/repo_utils/sub_tests/unittests.sh @@ -1,7 +1,7 @@ # ------------------------------------------------------------ -# unittest +# unittests # ------------------------------------------------------------ -run unittest coverage run --concurrency=multiprocessing -p repo_utils/run_unittest.py +run unittests coverage run --concurrency=multiprocessing -p repo_utils/run_unittest.py if [ $unittest ]; then assert_exit_code 0 fi diff --git a/repo_utils/truvari_ssshtests.sh b/repo_utils/truvari_ssshtests.sh index 5ead8e82..46c05ce4 100644 --- a/repo_utils/truvari_ssshtests.sh +++ b/repo_utils/truvari_ssshtests.sh @@ -24,7 +24,7 @@ source $TESTSRC/sub_tests/phab.sh source $TESTSRC/sub_tests/refine.sh source $TESTSRC/sub_tests/segment.sh source $TESTSRC/sub_tests/stratify.sh -source $TESTSRC/sub_tests/unittest.sh +source $TESTSRC/sub_tests/unittests.sh source $TESTSRC/sub_tests/vcf2df.sh source $TESTSRC/sub_tests/version.sh diff --git a/truvari/bench.py b/truvari/bench.py index df780bd7..579fe5c7 100644 --- a/truvari/bench.py +++ b/truvari/bench.py @@ -25,7 +25,7 @@ def parse_args(args): """ Pull the command line parameters """ - defaults = truvari.VariantParams.make_params() + defaults = truvari.VariantParams() parser = argparse.ArgumentParser(prog="bench", description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-b", "--base", type=str, required=True, diff --git a/truvari/variant_params.py b/truvari/variant_params.py index 527f6e8f..b43bc9d9 100644 --- a/truvari/variant_params.py +++ b/truvari/variant_params.py @@ -1,86 +1,123 @@ """ Truvari main parameters """ -import types - - +#pylint: disable=too-few-public-methods class VariantParams(): """ Holds variant parsing and matching parameters. - Example - >>> import truvari - >>> p = truvari.VariantParams(pctseq=0) - >>> v = truvari.VariantFile('repo_utils/test_files/variants/input1.vcf.gz', params=p) - >>> one = next(v); two = next(v) - >>> one.match(two) - + Attributes + ---------- + .. list-table:: + :header-rows: 1 + + * - Attribute + - Description + * - `refdist` + - Distance threshold for comparing positions in the reference genome. Default: 500. + * - `pctseq` + - Minimum percentage of sequence similarity required for a match. Default: 0.70 (70%). + * - `pctsize` + - Minimum percentage of size similarity required for a match. Default: 0.70 (70%). + * - `pctovl` + - Minimum percentage of reciprocal overlap required for comparing variants. Default: 0.0 (disabled). + * - `typeignore` + - Whether to ignore variant type mismatches during comparison. Default: `False`. + * - `no_roll` + - Whether to disable rolling of sequences for comparisons. Default: `False`. + * - `chunksize` + - Number of entries to process in each chunk. Default: 1000. + * - `bSample` + - Sample index for the "base" (a.k.a. self) variants during comparisons. Default: 0. + * - `cSample` + - Sample index for the "comparison" (a.k.a. other) variants during comparisons. Default: 0. + * - `dup_to_ins` + - Whether to treat duplications as insertions for some operations. Default: `False`. + * - `bnddist` + - Maximum allowed distance for breakend (BND) comparisons. Default: 100. + * - `sizemin` + - Minimum variant size to consider. Default: 50. + * - `sizefilt` + - Minimum size filter for comparison in the "comparison" dataset. Default: 30. + * - `sizemax` + - Maximum variant size to consider. Default: 50000. + * - `passonly` + - Whether to only consider variants with a "PASS" filter status. Default: `False`. + * - `no_ref` + - Whether to ignore reference homozygous variants in (a)ll, (b)ase, or (c)omp VCF Default: `False` (off)`. + * - `pick` + - Strategy for picking matches by Bench (single, ac, multi). + * - `ignore_monref` + - Whether to ignore monoallelic reference calls. Default: `True`. + * - `check_multi` + - Whether to check for and handle multi-allelic records. Default: `True`. + * - `check_monref` + - Whether to check for monoallelic reference calls. Default: `True`. + * - `no_single_bnd` + - Whether to exclude single-end breakends (BNDs) from comparisons. Default: `True`. + * - `write_resolved` + - Whether to write resolved REF/ALT sequences to output. Default: `False`. + * - `short_circuit` + - Whether to enable short-circuit logic for early exits in comparisons. Default: `False`. + * - `skip_gt` + - Whether to skip genotype comparisons. Default: `False`. - Look at `VariantParams.make_params()` for a list of all params and their defaults """ + DEFAULTS = { + "reference": None, + "refdist": 500, + "pctseq": 0.70, + "pctsize": 0.70, + "pctovl": 0.0, + "typeignore": False, + "no_roll": False, + "chunksize": 1000, + "bSample": 0, + "cSample": 0, + "dup_to_ins": False, + "bnddist": 100, + "sizemin": 50, + "sizefilt": 30, + "sizemax": 50000, + "passonly": False, + "no_ref": False, + "pick": "single", + "ignore_monref": True, + "check_multi": True, + "check_monref": True, + "no_single_bnd": True, + "write_resolved": False, + "short_circuit": False, + "skip_gt": False, + } + def __init__(self, args=None, **kwargs): """ - Initalize. args is a Namespace from argparse + Initialize VariantParams with defaults, args, and kwargs. + + Parameters + ---------- + args : Namespace (optional) + An argparse.Namespace object to initialize parameters. + kwargs : dict + Additional parameters to override defaults. """ - if args is not None: - params = self.make_params_from_args(args) - else: - params = self.make_params() + # Start with defaults + params = self.DEFAULTS.copy() + + # Override with args if provided + if args: + for key in vars(args): + if key in params: + params[key] = getattr(args, key) - # Override parameters with those provided in kwargs + # Override with kwargs for key, value in kwargs.items(): - if hasattr(params, key): - setattr(params, key, value) + if key in params: + params[key] = value else: raise ValueError(f"Invalid parameter: {key}") - for key, value in params.__dict__.items(): - setattr(self, key, value) - - @staticmethod - def make_params(): - """ - Makes a simple namespace of matching parameters. Holds defaults - """ - params = types.SimpleNamespace() - params.reference = None - params.refdist = 500 - params.pctseq = 0.70 - params.pctsize = 0.70 - params.pctovl = 0.0 - params.typeignore = False - params.no_roll = False - params.chunksize = 1000 - params.bSample = 0 - params.cSample = 0 - params.dup_to_ins = False - params.bnddist = 100 - params.sizemin = 50 - params.sizefilt = 30 - params.sizemax = 50000 - params.passonly = False - params.no_ref = False - params.pick = 'single' - params.ignore_monref = True - params.check_multi = True - params.check_monref = True - params.no_single_bnd = True - params.write_resolved = False - params.short_circuit = False - params.skip_gt = False - return params - - @staticmethod - def make_params_from_args(args): - """ - Makes a simple namespace of matching parameters. - Populates defaults from make_params, then updates with values from args. - """ - ret = VariantParams.make_params() - - for key in vars(ret): - if hasattr(args, key): - setattr(ret, key, getattr(args, key)) - - return ret + # Set attributes + self.__dict__.update(params)