Skip to content

Commit

Permalink
Corrected bugs with previous commit. Better output header. corrected …
Browse files Browse the repository at this point in the history
…jackknife error calculation.
  • Loading branch information
TCLamnidis committed May 31, 2019
1 parent ae4e12f commit b2dca9e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 28 deletions.
22 changes: 11 additions & 11 deletions RASCalculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def getTotalDerivedAC(afDict):
# Bin maxAF + 1: Outgroup F3 stats
RAS = [[[[0 for i in range(NumBins)] for j in range(maxAF+2)] for k in range(len(TestPops))] for x in range(len(LeftPops))]
# The normalization only records a total for all allele frequencies.
mj = [0 for i in range(NumBins)] ## Bin sizes are now stable across all RAS calculations.
#[[ [0 for i in range(NumBins)] for k in range(len(TestPops))] for x in range(len(LeftPops))]
blockSizes = [0 for i in range(NumBins)] ## Bin sizes are now stable across all RAS calculations.
mj = [[ [0 for i in range(NumBins)] for k in range(len(TestPops))] for x in range(len(LeftPops))]


totalRightPopSize = sum(freqSumParser.sizes[p] for p in RightPops)
Expand All @@ -114,7 +114,7 @@ def getTotalDerivedAC(afDict):
if args.NoTransitions and isTransition(Ref, Alt):
continue

mj[Chrom] += 1
blockSizes[Chrom] += 1

missingness = getMissingness(afDict)
if missingness > args.MissingnessCutoff:
Expand All @@ -137,7 +137,7 @@ def getTotalDerivedAC(afDict):
rightSize = freqSumParser.sizes[testPop]

if afDict[leftPop] >= 0 and afDict[testPop] >= 0:
# mj[Lftidx][Tstidx][Chrom] += 1
mj[Lftidx][Tstidx][Chrom] += 1
xLeft = afDict[leftPop] / leftSize
xRight = afDict[testPop] / rightSize
xOutgroup = afDict[args.outgroup] / freqSumParser.sizes[args.outgroup]
Expand All @@ -160,26 +160,26 @@ def getTotalDerivedAC(afDict):
for x in range(len(LeftPops)):
for j in range(len(TestPops)):
for i in range(minAF-1,maxAF+2):
thetaJ,sigma2=ras.getJackknife(RAS[x][j][i],mj)
ThetaJ[x][j][i]=thetaJ
Sigma2[x][j][i]=sigma2
thetaJ,sigma2 = ras.getJackknife(RAS[x][j][i], mj[x][j], blockSizes)
ThetaJ[x][j][i] = thetaJ
Sigma2[x][j][i] = sigma2

# print ("#FREQSUM POPULATIONS & SIZES:",*Pops, file=args.Output, sep=" ", end="\n")
print ("#Left Populations: ", *LeftPops, sep=" ", file=args.Output, end="\n")
print ("#Tested Populations: ", *TestPops, sep=" ", file=args.Output, end="\n")
print ("#Populations considered for allele frequency calculation (Rights):", *RightPops, file=args.Output, sep="\t", end="\n")
print ("#Outgroup: ", args.outgroup, file=args.Output, sep="\t", end="\n")
# RAS, number of sites, RAS /Site, stderr of (RAS/site), Allele Freq
print("TestPop","LeftPop","RAS","Number of sites","RAS/site JK Estimate", "Jackknife Error", "Allele Frequency", sep="\t", file=args.Output)
print("TestPop","LeftPop","RAS","NumSites","RAS/site", "Error", "AlleleCount", sep="\t", file=args.Output)
for leftidx, leftPop in enumerate(LeftPops):
for tstidx, testPop in enumerate(TestPops):
if args.details:
for m in range(minAF,maxAF+1):
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj)), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),m, sep="\t", file=args.Output)
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj[leftidx][tstidx])), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),m, sep="\t", file=args.Output)
m=minAF-1
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj)), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Total [{},{}]".format(minAF,maxAF), sep="\t", file=args.Output)
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj[leftidx][tstidx])), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Total [{},{}]".format(minAF,maxAF), sep="\t", file=args.Output)
m=maxAF+1
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj)), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Outgroup F3", sep="\t", file=args.Output)
print (testPop, leftPop, "{:.5}".format(float(sum(RAS[leftidx][tstidx][m]))), "{:.15e}".format(sum(mj[leftidx][tstidx])), "{:.15e}".format(ThetaJ[leftidx][tstidx][m]), "{:.15e}".format(sqrt(Sigma2[leftidx][tstidx][m])),"Outgroup F3", sep="\t", file=args.Output)
#print ("", file=args.Output)

print ("Program finished running at:", strftime("%D %H:%M:%S"), file=sys.stderr)
Expand Down
35 changes: 18 additions & 17 deletions RASUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,30 +40,31 @@ def __readHeader(self):
self.sizes[popName] = popSize
print("#Available populations in Input File and their respective sizes: ", self.sizes, file=self.output)

def getJackknife(blockValues, blockSizes):
def getJackknife(blockValues, totalObservations, blockSizes):
thetaminus=[0 for x in range(len(blockSizes))]
sum1=0
sum2=0
jackknifeStdErr=0
if sum(blockSizes)==0:
if sum(totalObservations)==0:
thetahat=0
else:
thetahat=sum(blockValues)/sum(blockSizes)
for c in range(len(blockValues)):
if blockSizes[c]==sum(blockSizes):
thetaminus[c]=0
thetahat=sum(blockValues)/sum(totalObservations)

## Normalise blockValues.
normalisedValues = [0.0 for c in range(len(blockSizes))]
for c in range(len(blockSizes)):
if totalObservations[c] == 0:
continue
else:
thetaminus[c]=(sum(blockValues)-blockValues[c])/(sum(blockSizes)-blockSizes[c])
normalisedValues[c] = blockValues[c]/totalObservations[c]

for c in range(len(blockSizes)):
thetaminus[c]=( (sum(blockValues)-blockValues[c]) / (sum(totalObservations)-totalObservations[c]) )
sum1+=thetahat-thetaminus[c]
if sum(blockSizes)!=0:
sum2+=(blockSizes[c]*thetaminus[c])/sum(blockSizes)
sum2+=(blockSizes[c]*thetaminus[c])/sum(blockSizes)
jackknifeEstimator=sum1+sum2
for c in range(len(blockSizes)):
if blockSizes[c]!=0:
hj=sum(blockSizes)/blockSizes[c]
if hj==1:
jackknifeStdErr+=1
else:
pseudoval = (hj*thetahat)-((hj-1)*thetaminus[c])
jackknifeStdErr+=(1/len(blockSizes))*(((pseudoval-jackknifeEstimator)**2)/(hj-1))
return (jackknifeEstimator,jackknifeStdErr)
hj=sum(blockSizes)/blockSizes[c]
pseudoval = (hj*thetahat)-((hj-1)*thetaminus[c])
jackknifeStdErr+=(1/len(blockSizes))*(((pseudoval-jackknifeEstimator)**2)/(hj-1))
return (jackknifeEstimator,jackknifeStdErr)

0 comments on commit b2dca9e

Please sign in to comment.