forked from keleslab/mHiC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyStats.py
executable file
·64 lines (61 loc) · 2.54 KB
/
myStats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
##############################################################################
### To use the functions in this lib simply import this python module using
### import myStats
### Then you'll able able to call functions with the proper arguments using
### returnVal=myStats.func1(arg1,arg2)
##############################################################################
##############################################################################
import numpy as np
################### FUNC benjamini_hochberg_correction #####################
#### Given an array of p-values (not necessarily sorted) and the number of total
### tests that were performed to gather these p-values, this function performs
### the multiple hypothesis testing correction described by Benjamini-Hochberg.
###
###
### If the number of tests are much more compared to p-value array and
### the omitted p-values are all supposed to be zero you should use it like:
### q_array=benjamini_hochberg_correction([0.03,0.4,0.7,0.01],10)
###
### If the number of tests are equal to the ones in p-values array then:
### p_array=[0.03,0.4,0.7,0.01]
### q_array=benjamini_hochberg_correction(p_array,len(p_array))
##############################################################################
def benjamini_hochberg_correction(p_values, num_total_tests):
# assumes that p_values vector might not be sorted
pvalsArray=np.array(p_values)
order=pvalsArray.argsort()
sorted_pvals=np.take(p_values,order)
q_values=[1.0 for i in range(len(p_values))]
prev_bh_value = 0
for i, p_value in enumerate(sorted_pvals):
if p_value==1.0:
bh_value=1.0
else:
bh_value = p_value * num_total_tests / (i + 1)
# Sometimes this correction can give values greater than 1,
# so we set those values at 1
bh_value = min(bh_value, 1)
#
# To preserve monotonicity in the values, we take the
# maximum of the previous value or this one, so that we
# don't yield a value less than the previous.
bh_value = max(bh_value, prev_bh_value)
prev_bh_value = bh_value
qIndex=order[i]
q_values[qIndex] =bh_value
#END for
return q_values
################### FUNC meanAndVariance ##################################
#### Given an array this function calculates the mean and variance
##############################################################################
def meanAndVariance(a):
#calculates variance using E(x^2) - (Ex)^2
sumxsquared=0
sumx=0
for x in a:
sumxsquared += (x*x)
sumx += x
mean=sumx/float(len(a))
squaremean=sumxsquared/float(len(a))
variance=squaremean - (mean*mean)
return (mean, variance)