Skip to content

Commit

Permalink
update func tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ACEnglish committed Sep 7, 2024
1 parent 82330d4 commit feda14b
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 9 deletions.
4 changes: 2 additions & 2 deletions imgs/coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 3 additions & 2 deletions repo_utils/answer_key/collapse/input1_median_collapsed.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ chr20 419860 . AGTGACCCTGCACCTGGCT A 60 . QNAME=HG002-S9-H2-000001F;QSTART=37411
chr20 420228 . A C 60 . QNAME=HG002-S9-H2-000001F;QSTART=374466;QSTRAND=+ GT:PL:DP 0|1:2,3,6:24,29
chr20 420465 . A AG 60 . QNAME=HG002-S9-H2-000001F;QSTART=374704;QSTRAND=+ GT:PL:DP 0|1:6,10,1:32,35
chr20 420561 . A T 60 . QNAME=HG002-S9-H1-000001F;QSTART=399939;QSTRAND=+ GT:PL:DP 1|1:5,6,7:36,12
chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;NumCollapsed=1;NumConsolidated=0;CollapseId=4.0;CollapseStart=420664;CollapseEnd=420665;CollapseSize=226 GT:PL 0/1:.
chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCCATCCCCCGTCCGCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 61 . QNAME=HG002-S9-H2-000001F;QSTART=374905;QSTRAND=+;SVTYPE=INS;SVLEN=227;NumCollapsed=1;NumConsolidated=0;CollapseId=9.0;CollapseStart=420664;CollapseEnd=420665;CollapseSize=226 GT:PL 0/1:.
chr20 421409 . G A 60 . QNAME=HG002-S9-H1-000001F;QSTART=401013;QSTRAND=+ GT:PL:DP 1|1:2,3,7:14,41
chr20 421527 . T C 60 . QNAME=HG002-S9-H2-000001F;QSTART=375993;QSTRAND=+ GT:PL:DP 0|1:3,8,4:49,42
chr20 422066 . A G 60 . QNAME=HG002-S9-H1-000001F;QSTART=401670;QSTRAND=+ GT:PL:DP 1|1:3,7,8:9,39
Expand Down Expand Up @@ -1260,7 +1260,7 @@ chr20 639104 . A AT 60 . QNAME=HG002-S9-H1-000001F;QSTART=618555;QSTRAND=+ GT:PL
chr20 640046 . C T 60 . QNAME=HG002-S9-H1-000001F;QSTART=619497;QSTRAND=+ GT:PL:DP 1|0:10,10,9:11,48
chr20 640049 . C T 60 . QNAME=HG002-S9-H1-000001F;QSTART=619500;QSTRAND=+ GT:PL:DP 1|1:2,3,4:13,44
chr20 641878 . C G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621329;QSTRAND=+ GT:PL:DP 1|0:8,1,7:7,13
chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66;NumCollapsed=1;NumConsolidated=0;CollapseId=6.0;CollapseStart=642120;CollapseEnd=642121;CollapseSize=66 GT:PL:DP 1/0:7,5,7:34,13
chr20 641913 . G GGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGA 60 . QNAME=HG002-S9-H1-000001F;QSTART=621365;QSTRAND=+;SVTYPE=INS;SVLEN=66 GT:PL:DP 1/0:7,5,7:34,13
chr20 641944 . GGA G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621462;QSTRAND=+ GT:PL:DP 1|0:6,6,6:17,7
chr20 642012 . GGT G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621528;QSTRAND=+ GT:PL:DP 1|0:5,7,1:6,23
chr20 642037 . T TG 60 . QNAME=HG002-S9-H1-000001F;QSTART=621551;QSTRAND=+ GT:PL:DP 1|0:4,10,10:14,47
Expand All @@ -1280,6 +1280,7 @@ chr20 642284 . A G 60 . QNAME=HG002-S9-H1-000001F;QSTART=621795;QSTRAND=+ GT:PL:
chr20 642300 . G GCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGCCCAGCGGGGGTGGAGTTGCCTGGGGGGGGGGC 60 . QNAME=HG002-S9-H1-000001F;QSTART=621812;QSTRAND=+;SVTYPE=INS;SVLEN=408 GT:PL:DP 1/0:5,10,5:44,36
chr20 642300 . G GGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597225;QSTRAND=+ GT:PL:DP 0|1:4,10,8:25,7
chr20 642330 . G C 60 . QNAME=HG002-S9-H1-000001F;QSTART=622249;QSTRAND=+ GT:PL:DP 1|0:2,9,8:16,10
chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66 GT:PL:DP 0/1:6,1,9:20,25
chr20 642362 . G GC 60 . QNAME=HG002-S9-H1-000001F;QSTART=622282;QSTRAND=+ GT:PL:DP 1|1:10,5,1:17,12
chr20 642391 . G GC 60 . QNAME=HG002-S9-H1-000001F;QSTART=622312;QSTRAND=+ GT:PL:DP 1|1:2,9,2:10,50
chr20 642420 . G GC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597415;QSTRAND=+ GT:PL:DP 0|1:7,1,2:41,16
Expand Down
3 changes: 1 addition & 2 deletions repo_utils/answer_key/collapse/input1_median_removed.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,4 @@
##INFO=<ID=MatchId,Number=.,Type=String,Description="Tuple of base and comparison call ids which were matched">
##INFO=<ID=Multi,Number=0,Type=Flag,Description="Call is false due to non-multimatching">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA24385
chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=HG002-S9-H1-000001F;QSTART=400044;QSTRAND=+;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.9956;PctSizeSimilarity=0.9956;PctRecOverlap=1;SizeDiff=1;StartDistance=0;EndDistance=0;GTMatch=.;TruScore=99;MatchId=4.0 GT:PL:DP 1/0:4,8,6:32,9
chr20 642330 . G GGCCCAGCGGGGGTGGAGTTGCCTGTGGTGGGGGGCCCAGCGGGGGTGGAGTTGCCTGTGGGGGGGC 60 . QNAME=HG002-S9-H2-000001F;QSTART=597257;QSTRAND=+;SVTYPE=INS;SVLEN=66;PctSeqSimilarity=0.9576;PctSizeSimilarity=1;PctRecOverlap=0;SizeDiff=0;StartDistance=-417;EndDistance=-417;GTMatch=.;TruScore=65;MatchId=6.0 GT:PL:DP 0/1:6,1,9:20,25
chr20 420665 . G GCCCACCCCATCCCCCGTCCCCATCCCCCATCCCCCGTCCCCCGTCCCCATCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCATCCCCCGTCCCCCGTCCCCCATCCCATCCCCCACCCCCATCCCCCGTCCCCCGTCCCCATCCCCCATCCCCCATCCCCATCCCCCGTCCCCCGTCCCCCATCTCCTGTCCCCCGTCCCCCATCCCCCGTCCCCATCCCCCACC 60 . QNAME=HG002-S9-H1-000001F;QSTART=400044;QSTRAND=+;SVTYPE=INS;SVLEN=226;PctSeqSimilarity=0.9956;PctSizeSimilarity=0.9956;PctRecOverlap=1;SizeDiff=1;StartDistance=0;EndDistance=0;GTMatch=.;TruScore=99;MatchId=9.0 GT:PL:DP 1/0:4,8,6:32,9
3 changes: 3 additions & 0 deletions repo_utils/sub_tests/collapse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ if [ $test_collapse_badparams ]; then
assert_exit_code 100
fi

# Lower collapse sub-chunk threshold
export COLLAP_SUB=1
run test_collapse_median $truv collapse -f $INDIR/references/reference.fa \
-i $INDIR/variants/input1.vcf.gz \
-o $OD/input1_median_collapsed.vcf \
Expand All @@ -82,6 +84,7 @@ run test_collapse_median $truv collapse -f $INDIR/references/reference.fa \
if [ $test_collapse_median ]; then
collapse_assert 1_median
fi
unset COLLAP_SUB

run test_collapse_intragt $truv collapse -i $INDIR/variants/bcftools_merged.vcf.gz \
-o $OD/inputintragt_collapsed.vcf \
Expand Down
8 changes: 5 additions & 3 deletions truvari/collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ def append(self, data):
"""
Put data onto end of list
"""
new_node = (data, None)
new_node = [data, None]
if not self.head:
self.head = new_node
self.tail = new_node
Expand Down Expand Up @@ -797,8 +797,9 @@ def tree_size_chunker(matcher, chunks):
Needs to return the same thing as a chunker
"""
chunk_count = 0
thresh = 1 if "COLLAP_SUB" in os.environ and os.environ["COLLAP_SUB"] == "1" else 100
for chunk, _ in chunks:
if len(chunk['base']) < 100: # fewer than 100 is fine
if len(chunk['base']) < thresh: # fewer than 100 is fine
chunk_count += 1
yield chunk, chunk_count
continue
Expand Down Expand Up @@ -826,8 +827,9 @@ def tree_dist_chunker(matcher, chunks):
This does nothing
"""
chunk_count = 0
thresh = 1 if "COLLAP_SUB" in os.environ and os.environ["COLLAP_SUB"] == "1" else 100
for chunk, _ in chunks:
if len(chunk['base']) < 100: # fewer than 100 is fine
if len(chunk['base']) < thresh: # fewer than 100 is fine
chunk_count += 1
yield chunk, chunk_count
continue
Expand Down

0 comments on commit feda14b

Please sign in to comment.