diff --git a/imgs/coverage.svg b/imgs/coverage.svg index ea71b13a..6a91c8d4 100644 --- a/imgs/coverage.svg +++ b/imgs/coverage.svg @@ -17,7 +17,7 @@ coverage - 90% - 90% + 91% + 91% diff --git a/repo_utils/answer_key/collapse/input1_median_collapsed.vcf b/repo_utils/answer_key/collapse/input1_median_collapsed.vcf index a5af9093..f3b9734b 100644 --- a/repo_utils/answer_key/collapse/input1_median_collapsed.vcf +++ b/repo_utils/answer_key/collapse/input1_median_collapsed.vcf @@ -888,7 +888,7 @@ chr20 419860 . AGTGACCCTGCACCTGGCT A 60 . QNAME=HG002-S9-H2-000001F;QSTART=37411 chr20 420228 . A C 60 . QNAME=HG002-S9-H2-000001F;QSTART=374466;QSTRAND=+ GT:PL:DP 0|1:2,3,6:24,29 chr20 420465 . A AG 60 . QNAME=HG002-S9-H2-000001F;QSTART=374704;QSTRAND=+ GT:PL:DP 0|1:6,10,1:32,35 chr20 420561 . A T 60 . QNAME=HG002-S9-H1-000001F;QSTART=399939;QSTRAND=+ GT:PL:DP 1|1:5,6,7:36,12 -chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;NumCollapsed=1;NumConsolidated=0;CollapseId=4.0;CollapseStart=420664;CollapseEnd=420665;CollapseSize=226 GT:PL 0/1:. +chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;NumCollapsed=1;NumConsolidated=0;CollapseId=9.0;CollapseStart=420664;CollapseEnd=420665;CollapseSize=226 GT:PL 0/1:. chr20 421409 . G A 60 . QNAME=HG002-S9-H1-000001F;QSTART=401013;QSTRAND=+ GT:PL:DP 1|1:2,3,7:14,41 chr20 421527 . T C 60 . QNAME=HG002-S9-H2-000001F;QSTART=375993;QSTRAND=+ GT:PL:DP 0|1:3,8,4:49,42 chr20 422066 . A G 60 . QNAME=HG002-S9-H1-000001F;QSTART=401670;QSTRAND=+ GT:PL:DP 1|1:3,7,8:9,39 @@ -1260,7 +1260,7 @@ chr20 639104 . A AT 60 . QNAME=HG002-S9-H1-000001F;QSTART=618555;QSTRAND=+ GT:PL chr20 640046 . C T 60 . QNAME=HG002-S9-H1-000001F;QSTART=619497;QSTRAND=+ GT:PL:DP 1|0:10,10,9:11,48 chr20 640049 . C T 60 . QNAME=HG002-S9-H1-000001F;QSTART=619500;QSTRAND=+ GT:PL:DP 1|1:2,3,4:13,44 chr20 641878 . C G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621329;QSTRAND=+ GT:PL:DP 1|0:8,1,7:7,13 -chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66;NumCollapsed=1;NumConsolidated=0;CollapseId=6.0;CollapseStart=642120;CollapseEnd=642121;CollapseSize=66 GT:PL:DP 1/0:7,5,7:34,13 +chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66 GT:PL:DP 1/0:7,5,7:34,13 chr20 641944 . GGA G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621462;QSTRAND=+ GT:PL:DP 1|0:6,6,6:17,7 chr20 642012 . GGT G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621528;QSTRAND=+ GT:PL:DP 1|0:5,7,1:6,23 chr20 642037 . T TG 60 . QNAME=HG002-S9-H1-000001F;QSTART=621551;QSTRAND=+ GT:PL:DP 1|0:4,10,10:14,47 @@ -1280,6 +1280,7 @@ chr20 642284 . A G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621795;QSTRAND=+ GT:PL: chr20 642300 . G GCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=621812;QSTRAND=+;SVTYPE=INS;SVLEN=408 GT:PL:DP 1/0:5,10,5:44,36 chr20 642300 . G GGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597225;QSTRAND=+ GT:PL:DP 0|1:4,10,8:25,7 chr20 642330 . G C 60 . QNAME=HG002-S9-H1-000001F;QSTART=622249;QSTRAND=+ GT:PL:DP 1|0:2,9,8:16,10 +chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66 GT:PL:DP 0/1:6,1,9:20,25 chr20 642362 . G GC 60 . QNAME=HG002-S9-H1-000001F;QSTART=622282;QSTRAND=+ GT:PL:DP 1|1:10,5,1:17,12 chr20 642391 . G GC 60 . QNAME=HG002-S9-H1-000001F;QSTART=622312;QSTRAND=+ GT:PL:DP 1|1:2,9,2:10,50 chr20 642420 . G GC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597415;QSTRAND=+ GT:PL:DP 0|1:7,1,2:41,16 diff --git a/repo_utils/answer_key/collapse/input1_median_removed.vcf b/repo_utils/answer_key/collapse/input1_median_removed.vcf index 2905ef00..cccc217d 100644 --- a/repo_utils/answer_key/collapse/input1_median_removed.vcf +++ b/repo_utils/answer_key/collapse/input1_median_removed.vcf @@ -47,5 +47,4 @@ ##INFO= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385 -chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=HG002-S9-H1-000001F;QSTART=400044;QSTRAND=+;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.9956;PctSizeSimilarity=0.9956;PctRecOverlap=1;SizeDiff=1;StartDistance=0;EndDistance=0;GTMatch=.;TruScore=99;MatchId=4.0 GT:PL:DP 1/0:4,8,6:32,9 -chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.9576;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;GTMatch=.;TruScore=65;MatchId=6.0 GT:PL:DP 0/1:6,1,9:20,25 +chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=HG002-S9-H1-000001F;QSTART=400044;QSTRAND=+;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.9956;PctSizeSimilarity=0.9956;PctRecOverlap=1;SizeDiff=1;StartDistance=0;EndDistance=0;GTMatch=.;TruScore=99;MatchId=9.0 GT:PL:DP 1/0:4,8,6:32,9 diff --git a/repo_utils/sub_tests/collapse.sh b/repo_utils/sub_tests/collapse.sh index 93220119..70c5d66b 100644 --- a/repo_utils/sub_tests/collapse.sh +++ b/repo_utils/sub_tests/collapse.sh @@ -74,6 +74,8 @@ if [ $test_collapse_badparams ]; then assert_exit_code 100 fi +# Lower collapse sub-chunk threshold +export COLLAP_SUB=1 run test_collapse_median $truv collapse -f $INDIR/references/reference.fa \ -i $INDIR/variants/input1.vcf.gz \ -o $OD/input1_median_collapsed.vcf \ @@ -82,6 +84,7 @@ run test_collapse_median $truv collapse -f $INDIR/references/reference.fa \ if [ $test_collapse_median ]; then collapse_assert 1_median fi +unset COLLAP_SUB run test_collapse_intragt $truv collapse -i $INDIR/variants/bcftools_merged.vcf.gz \ -o $OD/inputintragt_collapsed.vcf \ diff --git a/truvari/collapse.py b/truvari/collapse.py index 068054ee..7cd44c79 100644 --- a/truvari/collapse.py +++ b/truvari/collapse.py @@ -738,7 +738,7 @@ def append(self, data): """ Put data onto end of list """ - new_node = (data, None) + new_node = [data, None] if not self.head: self.head = new_node self.tail = new_node @@ -797,8 +797,9 @@ def tree_size_chunker(matcher, chunks): Needs to return the same thing as a chunker """ chunk_count = 0 + thresh = 1 if "COLLAP_SUB" in os.environ and os.environ["COLLAP_SUB"] == "1" else 100 for chunk, _ in chunks: - if len(chunk['base']) < 100: # fewer than 100 is fine + if len(chunk['base']) < thresh: # fewer than 100 is fine chunk_count += 1 yield chunk, chunk_count continue @@ -826,8 +827,9 @@ def tree_dist_chunker(matcher, chunks): This does nothing """ chunk_count = 0 + thresh = 1 if "COLLAP_SUB" in os.environ and os.environ["COLLAP_SUB"] == "1" else 100 for chunk, _ in chunks: - if len(chunk['base']) < 100: # fewer than 100 is fine + if len(chunk['base']) < thresh: # fewer than 100 is fine chunk_count += 1 yield chunk, chunk_count continue