-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
179 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
chr20 149012 149095 3 3 3 | ||
chr20 278929 279098 7 7 7 | ||
chr20 280210 280275 1 1 1 | ||
chr20 306267 306268 2 2 2 | ||
chr20 380877 380878 1 1 1 | ||
chr20 420664 420665 2 2 2 | ||
chr20 613782 613837 1 1 1 | ||
chr20 641905 642391 11 11 11 | ||
chr20 709758 709852 2 2 2 | ||
chr20 764441 764537 3 3 3 | ||
chr20 949515 949619 1 1 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
""" | ||
Count the number of variants in each chunk | ||
Column 3: total number of variants | ||
Column 4: comma-deliminted number of sub-chunks after accounting for size | ||
Column 5: comma-deliminted number of sub-chunks after accounting for size and distance again | ||
""" | ||
import sys | ||
import argparse | ||
import pysam | ||
|
||
import truvari | ||
from truvari.collapse import tree_size_chunker, tree_dist_chunker | ||
|
||
def parse_args(args): | ||
""" | ||
Parse arguments | ||
""" | ||
parser = argparse.ArgumentParser(prog="chunks", description=__doc__, | ||
formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument("input", type=str, | ||
help="Input VCF") | ||
parser.add_argument("-o", "--output", type=str, default="/dev/stdout", | ||
help="Output name") | ||
parser.add_argument("-c", "--chunksize", type=int, default=500, | ||
help="Distance between variants to split chunks (%(default)s)") | ||
parser.add_argument("-s", "--sizemin", type=int, default=50, | ||
help="Minimum SV length") | ||
parser.add_argument("-S", "--sizemax", type=int, default=50000, | ||
help="Maximum SV length") | ||
args = parser.parse_args(args) | ||
truvari.setup_logging(show_version=True) | ||
return args | ||
|
||
def get_bounds(cnk): | ||
""" | ||
Min start and max end of variants | ||
""" | ||
mstart = sys.maxsize | ||
mend = 0 | ||
for i in cnk: | ||
mstart = min(mstart, i.start) | ||
mend = max(mend, i.stop) | ||
return mstart, mend | ||
|
||
def chunks_main(args): | ||
""" | ||
Main | ||
""" | ||
args = parse_args(args) | ||
v = pysam.VariantFile(args.input) | ||
m = truvari.Matcher() | ||
m.params.pctseq = 0 | ||
m.params.sizemin = args.sizemin | ||
m.params.sizefilt = args.sizemin | ||
m.params.sizemax = args.sizemax | ||
m.params.chunksize = args.chunksize | ||
m.params.refdist = args.chunksize | ||
c = truvari.chunker(m, ('base', v)) | ||
|
||
with open(args.output, 'w') as fout: | ||
for chunk, _ in c: | ||
if not chunk['base']: | ||
continue | ||
s, e = get_bounds(chunk['base']) | ||
chrom = chunk['base'][0].chrom | ||
num = len(chunk['base']) | ||
fout.write(f"{chrom}\t{s}\t{e}\t{num}") | ||
s_cnts = [] | ||
d_cnts = [] | ||
for i, _ in tree_size_chunker(m, [(chunk, 0)]): | ||
if i['base']: | ||
s_cnts.append(len(i['base'])) | ||
for j, _ in tree_dist_chunker(m, [(i, 0)]): | ||
d_cnts.append(len(j['base'])) | ||
s_cnts = ",".join(map(str, s_cnts)) | ||
d_cnts = ",".join(map(str, d_cnts)) | ||
fout.write(f"\t{s_cnts}\t{d_cnts}\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters