-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcount_significant_isoforms.py
90 lines (74 loc) · 3.23 KB
/
count_significant_isoforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import argparse
import math
import rmats_long_utils
def parse_args():
parser = argparse.ArgumentParser(
description=('Count isoforms that meet the cutoff values'))
parser.add_argument('--diff-transcripts',
required=True,
help='The path to the differential transcript results')
parser.add_argument(
'--out-tsv',
required=True,
help='The path to write transcripts that meet the cutoff values')
parser.add_argument(
'--adj-pvalue',
type=float,
default=0.05,
help='The cutoff for adjusted p-value (default: %(default)s)')
parser.add_argument('--use-unadjusted-pvalue',
action='store_true',
help='Use pvalue instead of adj_pvalue for the cutoff')
parser.add_argument(
'--delta-proportion',
type=float,
default=0.1,
help='The cutoff for delta isoform proportion (default: %(default)s)')
return parser.parse_args()
def count_significant_isoforms(transcript_path, out_path, adj_pvalue,
use_unadjusted_pvalue, delta_proportion):
with open(transcript_path, 'rt') as in_handle:
with open(out_path, 'wt') as out_handle:
count_significant_isoforms_with_handles(in_handle, out_handle,
adj_pvalue,
use_unadjusted_pvalue,
delta_proportion)
def count_significant_isoforms_with_handles(in_handle, out_handle, adj_pvalue,
use_unadjusted_pvalue,
delta_proportion):
genes = set()
isoform_count = 0
pvalue_col_name = 'adj_pvalue'
if use_unadjusted_pvalue:
pvalue_col_name = 'pvalue'
delta_prop_col_name = 'delta_isoform_proportion'
for line_i, line in enumerate(in_handle):
columns = line.rstrip('\n').split('\t')
if line_i == 0:
headers = columns
out_handle.write(line)
continue
row = dict(zip(headers, columns))
gene = row['gene_id']
found_pvalue = rmats_long_utils.parse_float(row[pvalue_col_name])
found_prop = rmats_long_utils.parse_float(row[delta_prop_col_name])
if math.isnan(found_pvalue) or math.isnan(found_prop):
continue
if (((found_pvalue <= adj_pvalue)
and (abs(found_prop) >= delta_proportion))):
genes.add(gene)
isoform_count += 1
out_handle.write(line)
gene_count = len(genes)
print('found {} isoforms from {} genes with'
' {} <= {} and abs({}) >= {}'.format(isoform_count, gene_count,
pvalue_col_name, adj_pvalue,
delta_prop_col_name,
delta_proportion))
def main():
args = parse_args()
count_significant_isoforms(args.diff_transcripts, args.out_tsv,
args.adj_pvalue, args.use_unadjusted_pvalue,
args.delta_proportion)
if __name__ == '__main__':
main()