-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathspeed_downsample_auc.r
77 lines (66 loc) · 2.52 KB
/
speed_downsample_auc.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
downsampled_auc = function(n_sigs, expr, zdata, zdata2model, hpc_args=list(n_jobs=0)) {
library(dplyr)
b = import('base')
index = zdata$index
keep = zdata$index %>%
group_by(pathway) %>%
sample_n(size=n_sigs, replace=TRUE) %>%
pull(id)
# build model
zdata$zscores = zdata$zscores[,keep]
zdata$index = zdata$index[match(keep, zdata$index$id),]
stopifnot(zdata$index$id == colnames(zdata$zscores))
model = zdata2model(zdata, hpc_args=hpc_args)$model
# score experiments
score_one = function(vecs, expr, index) {
narray::intersect(vecs, expr, along=1)
mat = t(expr) %*% vecs
ctl = mat[index$control,,drop=FALSE]
ptb = mat[index$perturbed,,drop=FALSE]
colMeans(ptb) - colMeans(ctl)
}
scores = mapply(score_one, expr=expr$expr, index=expr$records,
MoreArgs = list(vecs=model)) %>%
t() %>%
narray::map(along=1, scale)
# calculate ROC AUC
st = import('stats')
roc = import('./roc_util')
auc = list(scores=scores, index=index[match(rownames(scores), index$id),]) %>%
roc$scores2df() %>%
na.omit() %>%
mutate(method = "speed_matrix",
inferred = signature,
matched = perturbed == inferred) %>%
group_by(inferred, signature) %>%
do(st$roc(., "score", "matched")) %>%
ungroup() %>%
roc$roc2auc() %>%
transmute(pathway=inferred, auc=PROGENy)
}
if (is.null(module_name())) {
library(dplyr)
library(magrittr)
b = import('base')
io = import('io')
MODEL = commandArgs(TRUE)[1] %or% "../../model/model_matrix.r"
EXPR = commandArgs(TRUE)[2] %or% "../../data/expr.RData"
ZSCORES = commandArgs(TRUE)[3] %or% "../../data/zscores.RData"
OUTFILE = commandArgs(TRUE)[4] %or% "speed_downsample_auc.RData"
# load zscores, model building function, and expression for each experiment
zdata = io$load(ZSCORES)
zdata2model = import_(sub("\\.r$", "", MODEL))$zscore2model
expr = io$load(EXPR)
max_sigs = zdata$index %>%
group_by(pathway) %>%
summarize(n=n())
n_sigs = rep(1:max(max_sigs$n), 10)
aucs = clustermq::Q(downsampled_auc, n_sigs=n_sigs, job_size=1, memory=8192,
const = list(expr=expr, zdata=zdata, zdata2model=zdata2model))
aucs = data_frame(n_sigs=n_sigs, auc=aucs) %>%
tidyr::unnest() %>%
left_join(max_sigs, by="pathway") %>%
filter(n_sigs <= n) %>%
select(-n)
save(aucs, file=OUTFILE)
}