From a49aea6e7a0b73fa8669385bfa3dfbc9bb2858fc Mon Sep 17 00:00:00 2001 From: Mohamad Date: Tue, 13 Dec 2022 17:22:21 +0300 Subject: [PATCH 01/14] piping bdgraph --- workflow/rules/module_strings.py | 12 ++ .../bdgraph_pip/Dockerfile | 3 + .../bdgraph_pip/docs.rst | 0 .../bdgraph_pip/info.json | 22 +++ .../bdgraph_pip/rule.smk | 20 +++ .../bdgraph_pip/schema.json | 164 ++++++++++++++++++ .../bdgraph_pip/script.R | 164 ++++++++++++++++++ 7 files changed, 385 insertions(+) create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/docs.rst create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json create mode 100644 workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py index 69892ea4..f8251f6c 100644 --- a/workflow/rules/module_strings.py +++ b/workflow/rules/module_strings.py @@ -8,6 +8,8 @@ # of the id of some algorithm. Thus the id has to be exptracted into a path string first. def idtopath(mylist, json_string): + if mylist is None: + return "None" if isinstance(mylist, list): return [json_string[startalg][0] for startalg in mylist] else: @@ -35,6 +37,16 @@ def idtopath(mylist, json_string): for val in order_mcmc_list } ) +if "bdgraph_pip" in pattern_strings: + bdgraph_pip_list = config["resources"]["structure_learning_algorithms"]["bdgraph_pip"] + # The path to the startspace algorithm is extended here + for items in bdgraph_pip_list: + items["startalg"] = idtopath(items["startalg"], json_string) + + json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"], **val,) + for val in bdgraph_pip_list} ) + + if "bidag_partition_mcmc" in pattern_strings: bidag_partition_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_partition_mcmc"] diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile b/workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile new file mode 100644 index 00000000..627e29e0 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/Dockerfile @@ -0,0 +1,3 @@ +FROM r-base + +RUN R -e "install.packages(\"BDgraph\", repos=\"https://cran.rstudio.com\")" --no-save diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/docs.rst b/workflow/rules/structure_learning_algorithms/bdgraph_pip/docs.rst new file mode 100644 index 00000000..e69de29b diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json b/workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json new file mode 100644 index 00000000..9b750edb --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/info.json @@ -0,0 +1,22 @@ +{ + "title": "BDgraph", + "version": "2.64", + "package": { + "title": "BDgraph", + "url": "https://cran.r-project.org/web/packages/BDgraph/index.html" + }, + "docs_url": "https://cran.r-project.org/web/packages/BDgraph/BDgraph.pdf", + "papers": [ + { + "title": "BDgraph: An R Package for Bayesian Structure Learning in Graphical Models", + "url": "https://www.jstatsoft.org/article/view/v089i03" + } + ], + "outputs": [ + "graphtraj" + ], + "graph_types": [ + "UG" + ], + "language": "R" +} \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk new file mode 100644 index 00000000..e60c4427 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk @@ -0,0 +1,20 @@ + +def fix_none_startalg(wildcards): + if wildcards["startalg"] == "None": + return [] + else: + return "{output_dir}/adjmat_estimate/{data}/algorithm=/{startalg}/seed={replicate}/adjmat.csv", + +rule: + name: module_name + input: + data=alg_input_data(), + startgraph_file=fix_none_startalg, + output: + seqgraph=alg_output_seqgraph_path(module_name), + time=alg_output_time_path(module_name), + ntests=touch(alg_output_ntests_path(module_name)) + container: + docker_image("bdgraph") + script: + "script.R" diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json new file mode 100644 index 00000000..f039885d --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json @@ -0,0 +1,164 @@ +{ + "description": "BDgraph MCMC objects", + "title": "bdgraph", + "type": "array", + "items": { + "title": "bdgraph item", + "description": "bdgraph instance", + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier" + }, + "method": { + "anyOf": [ + { + "type": "string", + "enum": [ + "ggm", + "gcgm" + ] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": [ + "ggm", + "gcgm" + ] + } + } + ] + }, + "algo": { + "anyOf": [ + { + "type": "string", + "enum": [ + "rjmcmc", + "bdmcmc", + "bd-dmh", + "rj-dmh" + ] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": [ + "rjmcmc", + "bdmcmc", + "bd-dmh", + "rj-dmh" + ] + } + } + ] + }, + "iter": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "gprior": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" + }, + "dfprior": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "gstart": { + "type": "string", + "enum": [ + "full", + "empty" + ] + }, + "thresh": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" + }, + "threshold": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" + }, + "mcmc_estimator": { + "anyOf": [ + { + "type": "string", + "enum": [ + "map", + "threshold" + ] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": [ + "map", + "threshold" + ] + } + } + ] + }, + "burnin_frac": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" + }, + "mcmc_seed": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "timeout": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull" + }, + "startalg": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "additionalProperties": true, + "required": [ + "mcmc_estimator", + "burnin_frac", + "threshold", + "method", + "algo", + "startalg", + "iter", + "gprior", + "gstart", + "dfprior", + "thresh", + "mcmc_seed", + "timeout" + ], + "examples": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": "bdmcmc", + "iter": 3000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "startalg": null, + "mcmc_seed": 1, + "thresh": 0.5, + "mcmc_estimator": "threshold", + "threshold": [ + 0.1, + 0.3, + 0.5, + 0.7, + 0.9, + 1.0 + ], + "burnin_frac": 0.3 + } + ] + } +} diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R new file mode 100644 index 00000000..e543410e --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R @@ -0,0 +1,164 @@ +source("workflow/scripts/utils/add_timeout.R") +library(BDgraph) +library(R.utils) + +adjmatToEdgeString <- function(adjmat, labels) { + edgeinds <- which(adjmat == 1, arr.ind = TRUE) + df <- data.frame(edgeinds) + edges <- "[" + firstrow <- TRUE + for (i in rownames(df)) { + edge <- paste(labels[df[i, "row"]], labels[df[i, "col"]], sep = "-") + if (firstrow) { + sep <- "" + firstrow <- FALSE + } else { + sep <- ";" + } + edges <- paste(edges, edge, sep = sep) + } + edges <- paste(edges, sep = "", "]") + return(edges) +} + +dummyEdges <- function(labels) { + n <- length(labels) + added <- "[" + for (i in 2:n) { + edge <- paste(labels[1], labels[i], sep = "-") + if (i == 2) { + sep <- "" + } else { + sep <- ";" + } + added <- paste(added, edge, sep = sep) + } + added <- paste(added, sep = "", "]") + return(added) +} + +strvec_to_adjmat <- function(str, p) { + vec <- strsplit(str, "")[[1]] + + adjmat <- matrix(0, nrow = p, ncol = p) + k <- 1 + for (i in seq(2, p)) { + for (j in seq(1, i - 1)) { + adjmat[i, j] <- as.integer(vec[k]) + k <- k + 1 + } + } + + return(adjmat) +} + + +filename <- file.path(snakemake@output[["seqgraph"]]) +filename_data <- snakemake@input[["data"]] +seed <- as.integer(snakemake@wildcards[["mcmc_seed"]]) +data <- read.csv(filename_data, check.names = FALSE) + + +wrapper <- function() { + start <- proc.time()[1] + set.seed(seed) # BUG: Seeds doesn't seem to work in BDgraph. + p <- dim(data)[2] + + bdgraph.obj <- bdgraph(data, + n = NULL, + method = snakemake@wildcards[["method"]], + algorithm = snakemake@wildcards[["algo"]], + iter = as.integer(snakemake@wildcards[["iter"]]), + burnin = 0, + not.cont = NULL, + g.prior = as.numeric(snakemake@wildcards[["gprior"]]), + df.prior = as.numeric(snakemake@wildcards[["dfprior"]]), + g.start = snakemake@wildcards[["gstart"]], + jump = NULL, + save = TRUE, + cores = NULL, + threshold = as.numeric(snakemake@wildcards[["thresh"]]) + ) + + totaltime <- proc.time()[1] - start + + adjmat_traj <- list() + # all_graph contain indices from sample_graphs + j <- 1 + + for (i in bdgraph.obj$all_graphs) { + strvec <- bdgraph.obj$sample_graphs[[i]] + + # graphs are stores as upper triangular matrices, stacked as strings + # like 00100110 of length (p choose 2). + adjmat <- strvec_to_adjmat(strvec, p) + adjmat_traj[[j]] <- adjmat + j <- j + 1 + } + + # This returns a string which is a list of flattened adjacency matrices. + labels <- colnames(data) + added <- dummyEdges(labels) + + start_edges <- adjmatToEdgeString( + adjmat_traj[[1]], + labels + ) + minweight = min(bdgraph.obj$all_weights) + + if (snakemake@wildcards[["algo"]] %in% c("bdmcmc", "bd-dmh")) { + # Translate weights into indices by dividing by the smallest weight. This + # is not perfect though, so one could instead divide by the smallest weight diff. + # However, which gives the highest resolution depends on the chain. + # We also save the actual times in the time column. + + indices <- c(0, cumsum(bdgraph.obj$all_weights)) / minweight + indices <- round(indices) + + } else { + indices <- c(0, cumsum(bdgraph.obj$all_weights)) # no difference since the weight are all 1s. + } + + res <- data.frame( + "index" = c(-2, -1, indices[1]), + "time"= c(-2, -1, indices[1]), # adding the raw times as well. Can be used in e.g. heatmaps + "score" = c(0, 0, bdgraph.obj$graph_weights[[bdgraph.obj$all_graphs[[1]]]]), + "added" = c(added, "[]", start_edges), + "removed" = c("[]", added, "[]") + ) + + m <- length(adjmat_traj) + + prevmat <- adjmat_traj[[1]] + + cumsum_weights = c(0,cumsum(bdgraph.obj$all_weights)) + + for (i in seq(2, m)) { + if (all(adjmat_traj[[i]] == prevmat)) { + next + } + + removed_edge_mat <- prevmat - (prevmat & adjmat_traj[[i]]) * 1 + added_edge_mat <- adjmat_traj[[i]] - (prevmat & adjmat_traj[[i]]) * 1 + + added_edges <- adjmatToEdgeString(added_edge_mat, labels) + removed_edges <- adjmatToEdgeString(removed_edge_mat, labels) + + df <- data.frame( + "index" = indices[i], + "time"= cumsum_weights[i], # adding the raw times as well. Can be used in e.g. heamaps + "score" = bdgraph.obj$graph_weights[[bdgraph.obj$all_graphs[[i]]]], + "added" = added_edges, + "removed" = removed_edges + ) + + res <- rbind(res, df) + prevmat <- adjmat_traj[[i]] + } + + write.csv(x = res, file = filename, row.names = FALSE, quote = FALSE) + write(totaltime, file = snakemake@output[["time"]]) + +} + +add_timeout(wrapper) From 57c5fb6a22f308ddaf1b5ce9c86cf11421fd796e Mon Sep 17 00:00:00 2001 From: Mohamad Date: Tue, 13 Dec 2022 17:24:23 +0300 Subject: [PATCH 02/14] config file --- config/bdgraph_pip_random-5.json | 279 +++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 config/bdgraph_pip_random-5.json diff --git a/config/bdgraph_pip_random-5.json b/config/bdgraph_pip_random-5.json new file mode 100644 index 00000000..f6cd913b --- /dev/null +++ b/config/bdgraph_pip_random-5.json @@ -0,0 +1,279 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-5", + "parameters_id": "gwi", + "seed_range": [ + 1, + 1 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "bdgraph_pip_random5/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "bdgraph", + "gg99", + "bdgraph_pip" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [ + "bdgraph", + "gg99", + "bdgraph_pip" + ], + "mcmc_traj_plots": [ + { + "id": "gg99", + "burn_in": 100000, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + }, + { + "id": "bdgraph", + "burn_in": 500, + "thinning": 1, + "functional": [ + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [ + { + "id": "gg99", + "burn_in": 0.5, + "active": true + }, + { + "id": "bdgraph", + "burn_in": 0.5, + "active": true + }, + { + "id": "bdgraph_pip", + "burn_in": 0.5, + "active": true + } + ], + "mcmc_autocorr_plots": [ + { + "id": "gg99", + "burn_in": 150000, + "thinning": 500, + "lags": 50, + "functional": [ + "score", + "size" + ], + "active": false + } + ] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "sample_sizes": [ + 100, + 200 + ] + } + ] + }, + "graph": { + "bdgraph_graphsim": [ + { + "id": "lattice", + "p": 50, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "random-1", + "p": 50, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-2", + "p": 50, + "graph": "random", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "random-5", + "p": 50, + "graph": "random", + "class": null, + "size": null, + "prob": 0.5 + }, + { + "id": "cluster", + "p": 50, + "graph": "cluster", + "class": null, + "size": 20, + "prob": 0.2 + }, + { + "id": "circle", + "p": 50, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "hub", + "p": 50, + "graph": "hub", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "scale-free", + "p": 50, + "graph": "scale-free", + "class": null, + "size": null, + "prob": 0.2 + } + ] + }, + "parameters": { + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 3, + "threshold_conv": 0.0000001 + } + ] + }, + "structure_learning_algorithms": { + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": [ + "bdmcmc" + ], + "iter": 30000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [ + 1 + ], + "thresh": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "threshold": [ + 0.1, + 0.3, + 0.5, + 0.7, + 0.9, + 1.0 + ], + "burnin_frac": 0.3 + } + ], + "gg99_singlepair": [ + { + "id": "gg99", + "n_samples": [ + 2000000 + ], + "datatype": "continuous", + "randomits": 100, + "prior": "ep", + "ascore": null, + "bscore": null, + "clq": 2, + "sep": 4, + "penalty": [ + 0.0 + ], + "mcmc_seed": [ + 1 + ], + "timeout": null, + "mcmc_estimator": [ + "threshold" + ], + "threshold": [ + 0.1, + 0.3, + 0.5, + 0.7, + 0.9, + 1.0 + ], + "burnin_frac": 0.3 + } + ], + + "bdgraph_pip": [ + { + "id": "bdgraph_pip", + "method": "ggm", + "algo": [ + "bdmcmc" + ], + "iter": 3000, + "gprior": 0.5, + "dfprior": 3, + "startalg": "gg99", + "gstart": "empty", + "timeout": null, + "mcmc_seed": [ + 1 + ], + "thresh": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "threshold": [ + 0.1, + 0.3, + 0.5, + 0.7, + 0.9, + 1.0 + ], + "burnin_frac": 0.3 + } + ] + } + } +} From 267fc385da55701095deea591f1d3bc330d8365c Mon Sep 17 00:00:00 2001 From: Mohamad Date: Tue, 13 Dec 2022 21:32:56 +0300 Subject: [PATCH 03/14] fixing typo --- .../rules/structure_learning_algorithms/bdgraph_pip/schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json index f039885d..1ab2ecdb 100644 --- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/schema.json @@ -118,6 +118,7 @@ "type": "null" } ] + } }, "additionalProperties": true, "required": [ From e1d33ff7d16518252194c2dd93d41ce83808191a Mon Sep 17 00:00:00 2001 From: Felix Rios Date: Fri, 16 Dec 2022 11:17:10 +0100 Subject: [PATCH 04/14] Comments of problem with mcmc input alg. Not working yet. --- config/bdgraph_pip_random-5.json | 58 ++++++++++++------- workflow/rules/module_patterns.py | 6 +- workflow/rules/module_strings.py | 27 ++++++--- .../bdgraph_pip/rule.smk | 2 +- .../bdgraph_pip/script.R | 2 +- 5 files changed, 63 insertions(+), 32 deletions(-) diff --git a/config/bdgraph_pip_random-5.json b/config/bdgraph_pip_random-5.json index f6cd913b..90453a68 100644 --- a/config/bdgraph_pip_random-5.json +++ b/config/bdgraph_pip_random-5.json @@ -1,7 +1,7 @@ { "benchmark_setup": { "data": [ - { + { "data_id": "example1", "graph_id": "random-5", "parameters_id": "gwi", @@ -21,18 +21,16 @@ "path": true, "text": false, "ids": [ - "bdgraph", - "gg99", - "bdgraph_pip" + "gg99", + "bdgraph_pip" ] }, "graph_true_plots": true, "graph_true_stats": false, "ggally_ggpairs": false, "graph_plots": [ - "bdgraph", - "gg99", - "bdgraph_pip" + "gg99", + "bdgraph_pip" ], "mcmc_traj_plots": [ { @@ -52,28 +50,28 @@ "functional": [ "size" ], - "active": false + "active": false } ], "mcmc_heatmaps": [ { "id": "gg99", "burn_in": 0.5, - "active": true + "active": false }, { "id": "bdgraph", "burn_in": 0.5, - "active": true + "active": false }, - { - "id": "bdgraph_pip", - "burn_in": 0.5, - "active": true + { + "id": "bdgraph_pip", + "burn_in": 0.5, + "active": false } ], "mcmc_autocorr_plots": [ - { + { "id": "gg99", "burn_in": 150000, "thinning": 500, @@ -178,6 +176,25 @@ ] }, "structure_learning_algorithms": { + "bnlearn_tabu": [ + { + "id": "tabu-bge", + "score": "bge", + "iss": 1, + "issmu": [ + 0.0001, + 0.001, + 0.01, + 0.05 + ], + "issw": null, + "l": 5, + "k": 1, + "prior": "uniform", + "beta": 1, + "timeout": null + } + ], "bdgraph": [ { "id": "bdgraph", @@ -212,7 +229,7 @@ { "id": "gg99", "n_samples": [ - 2000000 + 20000 ], "datatype": "continuous", "randomits": 100, @@ -242,8 +259,7 @@ "burnin_frac": 0.3 } ], - - "bdgraph_pip": [ + "bdgraph_pip": [ { "id": "bdgraph_pip", "method": "ggm", @@ -252,8 +268,8 @@ ], "iter": 3000, "gprior": 0.5, - "dfprior": 3, - "startalg": "gg99", + "dfprior": 3, + "startalg": "tabu-bge", "gstart": "empty", "timeout": null, "mcmc_seed": [ @@ -276,4 +292,4 @@ ] } } -} +} \ No newline at end of file diff --git a/workflow/rules/module_patterns.py b/workflow/rules/module_patterns.py index 39934cc0..6ec5827b 100644 --- a/workflow/rules/module_patterns.py +++ b/workflow/rules/module_patterns.py @@ -13,10 +13,12 @@ def dict_to_path(d): c = d[0].copy() # take the first element in the list. BUG c.pop("id") # remove id from the string as only the parameters should identify the computation. - if "burnin" in c: - c.pop("burnin") + if "burnin_frac" in c: + c.pop("burnin_frac") if "threshold" in c: c.pop("threshold") + if "mcmc_estimator" in c: + c.pop("mcmc_estimator") if "active" in c: c.pop("active") sep = "/" diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py index f8251f6c..f838df79 100644 --- a/workflow/rules/module_strings.py +++ b/workflow/rules/module_strings.py @@ -7,11 +7,18 @@ # Order MCMC is special in the sense that it can define a startspace by means # of the id of some algorithm. Thus the id has to be exptracted into a path string first. -def idtopath(mylist, json_string): +def idtopath(mylist, json_string, mcmc_est=False): + print("mylist") + print(mylist) + + print("json_string") + print(json_string) if mylist is None: return "None" if isinstance(mylist, list): + #if mcmc_est: return [json_string[startalg][0] for startalg in mylist] + else: return json_string[str(mylist)] @@ -20,14 +27,17 @@ def idtopath(mylist, json_string): # Generate strings from the config file. for alg in config["resources"]["structure_learning_algorithms"]: # Some algorihtm takes input graphs. These are treated separately. - has_input_algs = ["bidag_order_mcmc", "bidag_partition_mcmc"] - if alg not in has_input_algs: + has_input_algs = ["bidag_order_mcmc", "bidag_partition_mcmc" , "bdgraph_pip"] + if (alg not in has_input_algs) and (alg not in mcmc_modules): # not the mcmc_modules yet json_string.update({val["id"]: expand(pattern_strings[alg], **val) for val in config["resources"]["structure_learning_algorithms"][alg]}) # These are special and have to be the last one since they take input strings as start space. # The start space path has to be gnerated first. + +# Maybe this can be done in the end. +# Need to check if the startspace_algoruthm is an mcmc algorithm. If so idtopath should give also append pattern_strings["mcmc_est"] if "bidag_order_mcmc" in pattern_strings: order_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_order_mcmc"] for items in order_mcmc_list: @@ -40,12 +50,15 @@ def idtopath(mylist, json_string): if "bdgraph_pip" in pattern_strings: bdgraph_pip_list = config["resources"]["structure_learning_algorithms"]["bdgraph_pip"] # The path to the startspace algorithm is extended here - for items in bdgraph_pip_list: - items["startalg"] = idtopath(items["startalg"], json_string) + for items in bdgraph_pip_list: + #print("hej") + #print(items) + items["startalg"] = idtopath(items["startalg"], json_string, mcmc_est=True) # Need to att the estmimator to the startalg as well - json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"], **val,) + json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"] +"/"+pattern_strings["mcmc_est"] , **val,) for val in bdgraph_pip_list} ) +#print(json_string["bdgraph_pip"]) if "bidag_partition_mcmc" in pattern_strings: @@ -54,7 +67,7 @@ def idtopath(mylist, json_string): for items in bidag_partition_mcmc_list: items["startspace_algorithm"] = idtopath(items["startspace_algorithm"], json_string) - json_string.update({val["id"]: expand(pattern_strings["bidag_partition_mcmc"], **val,) + json_string.update({val["id"]: expand(pattern_strings["bidag_partition_mcmc"]+"/"+pattern_strings["mcmc_est"], **val,) for val in bidag_partition_mcmc_list } ) diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk index e60c4427..8a944b7d 100644 --- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk @@ -15,6 +15,6 @@ rule: time=alg_output_time_path(module_name), ntests=touch(alg_output_ntests_path(module_name)) container: - docker_image("bdgraph") + "docker://onceltuca/bdgraph:2.64" script: "script.R" diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R index e543410e..d3689701 100644 --- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R @@ -1,6 +1,6 @@ source("workflow/scripts/utils/add_timeout.R") library(BDgraph) -library(R.utils) + adjmatToEdgeString <- function(adjmat, labels) { edgeinds <- which(adjmat == 1, arr.ind = TRUE) From a5c9e4828e76fe9b18aac67e57a9dcf9998fa8d2 Mon Sep 17 00:00:00 2001 From: Felix Rios Date: Wed, 21 Dec 2022 17:05:11 +0100 Subject: [PATCH 05/14] Supporting single MCMC startalg in bdgraph_pip. Not supporting list of startalg yet.. --- config/bdgraph_pip_random-5.json | 554 +++++++++--------- workflow/rules/module_strings.py | 77 ++- .../bdgraph_pip/rule.smk | 1 + .../bdgraph_pip/script.R | 1 + 4 files changed, 322 insertions(+), 311 deletions(-) diff --git a/config/bdgraph_pip_random-5.json b/config/bdgraph_pip_random-5.json index 90453a68..54d998e3 100644 --- a/config/bdgraph_pip_random-5.json +++ b/config/bdgraph_pip_random-5.json @@ -1,295 +1,279 @@ { - "benchmark_setup": { - "data": [ - { - "data_id": "example1", - "graph_id": "random-5", - "parameters_id": "gwi", - "seed_range": [ - 1, - 1 - ] - } - ], - "evaluation": { - "benchmarks": { - "filename_prefix": "bdgraph_pip_random5/", - "show_seed": false, - "errorbar": true, - "errorbarh": false, - "scatter": true, - "path": true, - "text": false, - "ids": [ - "gg99", - "bdgraph_pip" - ] - }, - "graph_true_plots": true, - "graph_true_stats": false, - "ggally_ggpairs": false, - "graph_plots": [ - "gg99", - "bdgraph_pip" - ], - "mcmc_traj_plots": [ - { - "id": "gg99", - "burn_in": 100000, - "thinning": 100, - "functional": [ - "score", - "size" - ], - "active": false - }, - { - "id": "bdgraph", - "burn_in": 500, - "thinning": 1, - "functional": [ - "size" - ], - "active": false - } - ], - "mcmc_heatmaps": [ - { - "id": "gg99", - "burn_in": 0.5, - "active": false - }, - { - "id": "bdgraph", - "burn_in": 0.5, - "active": false - }, - { - "id": "bdgraph_pip", - "burn_in": 0.5, - "active": false - } - ], - "mcmc_autocorr_plots": [ - { - "id": "gg99", - "burn_in": 150000, - "thinning": 500, - "lags": 50, - "functional": [ - "score", - "size" - ], - "active": false - } - ] - } - }, - "resources": { - "data": { - "iid": [ - { - "id": "example1", - "standardized": false, - "sample_sizes": [ - 100, - 200 - ] + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-5", + "parameters_id": "gwi", + "seed_range": [ + 1, + 1 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "bdgraph_pip_random5/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "bdgraph_pip" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [ + "bdgraph_pip" + ], + "mcmc_traj_plots": [ + { + "id": "gg99", + "burn_in": 100000, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + }, + { + "id": "bdgraph", + "burn_in": 500, + "thinning": 1, + "functional": [ + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [ + { + "id": "gg99", + "burn_in": 0.5, + "active": false + }, + { + "id": "bdgraph", + "burn_in": 0.5, + "active": false + }, + { + "id": "bdgraph_pip", + "burn_in": 0.5, + "active": false + } + ], + "mcmc_autocorr_plots": [ + { + "id": "gg99", + "burn_in": 150000, + "thinning": 500, + "lags": 50, + "functional": [ + "score", + "size" + ], + "active": false + } + ] } - ] }, - "graph": { - "bdgraph_graphsim": [ - { - "id": "lattice", - "p": 50, - "graph": "lattice", - "class": null, - "size": null, - "prob": 0.2 - }, - { - "id": "random-1", - "p": 50, - "graph": "random", - "class": null, - "size": null, - "prob": 0.1 - }, - { - "id": "random-2", - "p": 50, - "graph": "random", - "class": null, - "size": null, - "prob": 0.2 + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "sample_sizes": [ + 200 + ] + } + ] }, - { - "id": "random-5", - "p": 50, - "graph": "random", - "class": null, - "size": null, - "prob": 0.5 + "graph": { + "bdgraph_graphsim": [ + { + "id": "lattice", + "p": 50, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "random-1", + "p": 50, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-2", + "p": 10, + "graph": "random", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "random-5", + "p": 50, + "graph": "random", + "class": null, + "size": null, + "prob": 0.5 + }, + { + "id": "cluster", + "p": 50, + "graph": "cluster", + "class": null, + "size": 20, + "prob": 0.2 + }, + { + "id": "circle", + "p": 50, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "hub", + "p": 50, + "graph": "hub", + "class": null, + "size": null, + "prob": 0.2 + }, + { + "id": "scale-free", + "p": 50, + "graph": "scale-free", + "class": null, + "size": null, + "prob": 0.2 + } + ] }, - { - "id": "cluster", - "p": 50, - "graph": "cluster", - "class": null, - "size": 20, - "prob": 0.2 + "parameters": { + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 3, + "threshold_conv": 0.0000001 + } + ] }, - { - "id": "circle", - "p": 50, - "graph": "circle", - "class": null, - "size": null, - "prob": 0.2 - }, - { - "id": "hub", - "p": 50, - "graph": "hub", - "class": null, - "size": null, - "prob": 0.2 - }, - { - "id": "scale-free", - "p": 50, - "graph": "scale-free", - "class": null, - "size": null, - "prob": 0.2 - } - ] - }, - "parameters": { - "bdgraph_rgwish": [ - { - "id": "gwi", - "b": 3, - "threshold_conv": 0.0000001 - } - ] - }, - "structure_learning_algorithms": { - "bnlearn_tabu": [ - { - "id": "tabu-bge", - "score": "bge", - "iss": 1, - "issmu": [ - 0.0001, - 0.001, - 0.01, - 0.05 + "structure_learning_algorithms": { + "bnlearn_tabu": [ + { + "id": "tabu-bge", + "score": "bge", + "iss": 1, + "issmu": [ + 0.001 + ], + "issw": null, + "l": 5, + "k": 1, + "prior": "uniform", + "beta": 1, + "timeout": null + } ], - "issw": null, - "l": 5, - "k": 1, - "prior": "uniform", - "beta": 1, - "timeout": null - } - ], - "bdgraph": [ - { - "id": "bdgraph", - "method": "ggm", - "algo": [ - "bdmcmc" - ], - "iter": 30000, - "gprior": 0.5, - "dfprior": 3, - "gstart": "empty", - "timeout": null, - "mcmc_seed": [ - 1 - ], - "thresh": 0.5, - "mcmc_estimator": [ - "threshold" - ], - "threshold": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 1.0 - ], - "burnin_frac": 0.3 - } - ], - "gg99_singlepair": [ - { - "id": "gg99", - "n_samples": [ - 20000 - ], - "datatype": "continuous", - "randomits": 100, - "prior": "ep", - "ascore": null, - "bscore": null, - "clq": 2, - "sep": 4, - "penalty": [ - 0.0 - ], - "mcmc_seed": [ - 1 - ], - "timeout": null, - "mcmc_estimator": [ - "threshold" - ], - "threshold": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 1.0 - ], - "burnin_frac": 0.3 - } - ], - "bdgraph_pip": [ - { - "id": "bdgraph_pip", - "method": "ggm", - "algo": [ - "bdmcmc" - ], - "iter": 3000, - "gprior": 0.5, - "dfprior": 3, - "startalg": "tabu-bge", - "gstart": "empty", - "timeout": null, - "mcmc_seed": [ - 1 - ], - "thresh": 0.5, - "mcmc_estimator": [ - "threshold" - ], - "threshold": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 1.0 - ], - "burnin_frac": 0.3 + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": [ + "bdmcmc" + ], + "iter": 30000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [ + 1 + ], + "thresh": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "threshold": [ + 0.1, + 0.3, + 0.5, + 0.7, + 0.9, + 1.0 + ], + "burnin_frac": 0.3 + } + ], + "gg99_singlepair": [ + { + "id": "gg99", + "n_samples": [ + 20000 + ], + "datatype": "continuous", + "randomits": 100, + "prior": "ep", + "ascore": null, + "bscore": null, + "clq": 2, + "sep": 4, + "penalty": [ + 0.0 + ], + "mcmc_seed": [ + 1 + ], + "timeout": null, + "mcmc_estimator": [ + "threshold" + ], + "threshold": [ + 0.5 + ], + "burnin_frac": 0.3 + } + ], + "bdgraph_pip": [ + { + "id": "bdgraph_pip", + "method": "ggm", + "algo": [ + "bdmcmc" + ], + "iter": 3000, + "gprior": 0.5, + "dfprior": 3, + "startalg": "gg99", + "gstart": "empty", + "timeout": null, + "mcmc_seed": [ + 1 + ], + "thresh": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "threshold": [ + 0.5 + ], + "burnin_frac": 0.3 + } + ] } - ] } - } } \ No newline at end of file diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py index f838df79..eefd5d08 100644 --- a/workflow/rules/module_strings.py +++ b/workflow/rules/module_strings.py @@ -1,27 +1,47 @@ -# This file pricudes a dict where the paths for all algorithms are generated based on the -# algorithm pattern strings and the values in the config file. -# -# MCMC methods are special since the the estimation parameters should not mix with the -# parameters that define the run. -# -# Order MCMC is special in the sense that it can define a startspace by means -# of the id of some algorithm. Thus the id has to be exptracted into a path string first. - -def idtopath(mylist, json_string, mcmc_est=False): - print("mylist") - print(mylist) + + + +def id_to_alg(id): + for key, alg in config["resources"]["structure_learning_algorithms"].items(): + for obj in alg: + if obj["id"] == id: + return key + + return None + + + +""" + This file procudes a dict where the paths for all algorithms are generated based on the + algorithm pattern strings and the values in the config file. + MCMC methods are special since the the estimation parameters should not mix with the + parameters that define the run. + Order MCMC is special in the sense that it can define a startspace by means + of the id of some algorithm. Thus the id has to be exptracted into a path string first. +""" +def idtopath(idlist): + + + # mylist can either be None, an id, or a list of ids. + # The id may correspond to an MCMC alg, then the estimator parameters should be added too. + + alg = id_to_alg(idlist) + vals = config["resources"]["structure_learning_algorithms"][alg][0] - print("json_string") - print(json_string) - if mylist is None: + if idlist is None: return "None" - if isinstance(mylist, list): - #if mcmc_est: - return [json_string[startalg][0] for startalg in mylist] - + if isinstance(idlist, list): # TODO: not spported yet + if id_to_alg(idlist[0]) in mcmc_modules: + return + else: + return [json_string[startalg][0] for startalg in idlist] else: - return json_string[str(mylist)] + if alg in mcmc_modules: + return expand(pattern_strings[alg]+"/"+pattern_strings["mcmc_est"], **vals) + else: + return expand(pattern_strings[alg], **vals) + json_string = {} # Generate strings from the config file. @@ -41,38 +61,39 @@ def idtopath(mylist, json_string, mcmc_est=False): if "bidag_order_mcmc" in pattern_strings: order_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_order_mcmc"] for items in order_mcmc_list: - items["startspace_algorithm"] = idtopath(items["startspace_algorithm"], json_string) + items["startspace_algorithm"] = idtopath(items["startspace_algorithm"]) json_string.update({val["id"]: expand(pattern_strings["bidag_order_mcmc"]+"/"+pattern_strings["mcmc_est"], **val,) for val in order_mcmc_list } ) +# BUG: The the algs used as input need to bedefied before. PErhaps we can determined the pathstring here as well.. if "bdgraph_pip" in pattern_strings: bdgraph_pip_list = config["resources"]["structure_learning_algorithms"]["bdgraph_pip"] # The path to the startspace algorithm is extended here for items in bdgraph_pip_list: #print("hej") #print(items) - items["startalg"] = idtopath(items["startalg"], json_string, mcmc_est=True) # Need to att the estmimator to the startalg as well + + items["startalg"] = idtopath(items["startalg"]) # Need to att the estmimator to the startalg as well json_string.update({val["id"]: expand(pattern_strings["bdgraph_pip"] +"/"+pattern_strings["mcmc_est"] , **val,) for val in bdgraph_pip_list} ) -#print(json_string["bdgraph_pip"]) - + if "bidag_partition_mcmc" in pattern_strings: bidag_partition_mcmc_list = config["resources"]["structure_learning_algorithms"]["bidag_partition_mcmc"] # The path to the startspace algorithm is extended here for items in bidag_partition_mcmc_list: - items["startspace_algorithm"] = idtopath(items["startspace_algorithm"], json_string) + items["startspace_algorithm"] = idtopath(items["startspace_algorithm"]) json_string.update({val["id"]: expand(pattern_strings["bidag_partition_mcmc"]+"/"+pattern_strings["mcmc_est"], **val,) for val in bidag_partition_mcmc_list } ) -# parallelDG has MCMC estimator +# All MCMC algs for alg in mcmc_modules: if alg in pattern_strings: json_string.update({val["id"]: expand(pattern_strings[alg]+"/"+pattern_strings["mcmc_est"], **val) @@ -86,6 +107,10 @@ def idtopath(mylist, json_string, mcmc_est=False): json_string_mcmc_noest.update({val["id"]: expand(pattern_strings[alg], **val,) for val in config["resources"]["structure_learning_algorithms"][alg]}) + + + + # Evaluation strings def gen_evaluation_string_from_conf(method, alg_id): # This essentially converts a dict in (from an evaluation method conf) to a path string following a pattern diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk index 8a944b7d..80c4f73b 100644 --- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/rule.smk @@ -3,6 +3,7 @@ def fix_none_startalg(wildcards): if wildcards["startalg"] == "None": return [] else: + print("hoho") return "{output_dir}/adjmat_estimate/{data}/algorithm=/{startalg}/seed={replicate}/adjmat.csv", rule: diff --git a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R index d3689701..2ef32af5 100644 --- a/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R +++ b/workflow/rules/structure_learning_algorithms/bdgraph_pip/script.R @@ -63,6 +63,7 @@ wrapper <- function() { start <- proc.time()[1] set.seed(seed) # BUG: Seeds doesn't seem to work in BDgraph. p <- dim(data)[2] + bdgraph.obj <- bdgraph(data, n = NULL, From a1028db8161d3fb6dae2f17cea49d81de5e693e3 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Tue, 27 Dec 2022 14:28:39 +0300 Subject: [PATCH 06/14] adding ROC curves --- .../rules/evaluation/benchmarks/plot_ROC.R | 115 ++++++++++++++++-- .../rules/evaluation/benchmarks/rules.smk | 1 + .../evaluation/benchmarks/run_summarise.R | 15 ++- 3 files changed, 119 insertions(+), 12 deletions(-) diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R index 86e13562..4368ac6f 100644 --- a/workflow/rules/evaluation/benchmarks/plot_ROC.R +++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R @@ -21,6 +21,7 @@ dir.create(snakemake@output[["graph_type"]]) dir.create(snakemake@output[["SHD_cpdag_joint"]]) dir.create(snakemake@output[["f1_skel_joint"]]) dir.create(snakemake@output[["ntests_joint"]]) +dir.create(snakemake@output[["roc_FPR_TPR"]]) if (file.info(snakemake@input[["csv"]])$size == 0) { # Do nothing. Everything was timed out so the directories will be empty. @@ -222,6 +223,8 @@ unique_data <- unique(joint_bench$data) theme(plot.title = element_text(hjust = 0.5)) ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg) + + gg <- ggplot() + { if (errorbar) { geom_errorbar( @@ -277,8 +280,8 @@ unique_data <- unique(joint_bench$data) geom_point( data = joint_bench, alpha = 0.15, show.legend = FALSE, aes( - x = FPR_skel, - y = TP_skel / true_n_edges_skel, + x = FPR_pattern, + y = TPR_pattern, col = id_numlev ), shape = 20, @@ -290,8 +293,8 @@ unique_data <- unique(joint_bench$data) geom_text( data = joint_bench, alpha = 0.25, show.legend = FALSE, aes( - x = FPR_skel, - y = TP_skel / true_n_edges_skel, + x = FPR_pattern, + y = TPR_pattern, label = replicate, col = id_numlev, shape = id_numlev ), check_overlap = FALSE @@ -335,15 +338,113 @@ unique_data <- unique(joint_bench$data) ylim(ylim[1], ylim[2]) } } + - xlab("FP/P") + - ylab("TP/P") + - ggtitle("Median FP/P vs. TP/P (undirected skeleton)") + + xlab("FPR") + + ylab("TPR") + + ggtitle("Median FPR vs. TPR (undirected skeleton)") + labs(col = "id") + theme_bw() + theme(plot.title = element_text(hjust = 0.5)) ggsave(file = paste(snakemake@output[["roc_FPRp_TPR_skel"]],"/", plt_counter, ".png", sep=""), plot = gg) + + gg <- ggplot() + { + if (path) { + geom_path( + data = toplot, alpha = 0.8, + aes( + x = FPR_skel_median, + y = TPR_skel_median, + col = id_numlev + ) + ) + } + } + { + if (!param_annot) { + geom_point( + data = toplot, alpha = 0.5, + aes( + x = FPR_skel_median, + y = TPR_skel_median, + col = id_numlev, + shape = id_numlev + ), + size = 1 + ) + } + } + { + if (scatter && !show_seed) { + geom_point( + data = joint_bench, alpha = 0.15, show.legend = FALSE, + aes( + x = FPR_pattern, + y = TPR_pattern, + col = id_numlev + ), + shape = 20, + size = 1 + ) + } + } + { + if (scatter && show_seed) { + geom_text( + data = joint_bench, alpha = 0.25, show.legend = FALSE, + aes( + x = FPR_pattern, + y = TPR_pattern, + label = replicate, col = id_numlev, shape = id_numlev + ), + check_overlap = FALSE + ) + } + } + { + if (path && !param_annot) { + geom_label( + data = toplot %>% + group_by(id_numlev) %>% + replace_na(list("curve_vals" = 0)) %>% + filter(curve_vals == max(curve_vals)), + alpha = 0.8, position = "dodge", alpha = 1, show.legend = FALSE, + aes( + x = FPR_skel_median, y = TPR_skel_median, + col = id_numlev, label = id_num + ) + ) + } + } + { + if (param_annot) { + geom_label( + data = toplot, alpha = 0.5, + aes( + x = FPR_skel_median, + y = TPR_skel_median, + label = curve_vals, col = id_numlev, shape = id_numlev + ), + check_overlap = FALSE + ) + } + } + + guides(shape = FALSE) + + facet_wrap(. ~ adjmat + parameters + data, nrow = 2) + + { + if (!is.null(xlim)) { + xlim(xlim[1], xlim[2]) + } + } + { + if (!is.null(ylim)) { + ylim(ylim[1], ylim[2]) + } + } + + xlab("FPR") + + ylab("TPR") + + ggtitle("FPR vs. TPR (undirected skeleton)") + + labs(col = "id") + + theme_bw() + + theme(plot.title = element_text(hjust = 0.5)) + ggsave(file = paste(snakemake@output[["roc_FPR_TPR"]],"/", plt_counter, ".png", sep=""), plot = gg) + + + gg <- ggplot() + { if (errorbar) { diff --git a/workflow/rules/evaluation/benchmarks/rules.smk b/workflow/rules/evaluation/benchmarks/rules.smk index 447ec9f4..023c41c4 100644 --- a/workflow/rules/evaluation/benchmarks/rules.smk +++ b/workflow/rules/evaluation/benchmarks/rules.smk @@ -37,6 +37,7 @@ rule benchmarks: FPRp_FNR_skel=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "FPRp_FNR_skel"), fnr_fprp_skel=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "FNR_FPR_skel"), roc_FPRp_TPR_skel=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "FPR_TPR_skel"), + roc_FPR_TPR=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "roc_FPR_TPR"), elapsed_time_joint=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "elapsed_time_joint"), graph_type=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "graph_type"), SHD_cpdag_joint=directory("results/output/benchmarks/"+config["benchmark_setup"]["evaluation"]["benchmarks"]["filename_prefix"] + "SHD_cpdag_joint"), diff --git a/workflow/rules/evaluation/benchmarks/run_summarise.R b/workflow/rules/evaluation/benchmarks/run_summarise.R index c8292b6a..7ff2a33b 100644 --- a/workflow/rules/evaluation/benchmarks/run_summarise.R +++ b/workflow/rules/evaluation/benchmarks/run_summarise.R @@ -18,6 +18,7 @@ compareEGs <- function(estEG, trueEG) { estSkel <- EGskel(estEG) # estimated skeleton trueSkel <- EGskel(trueEG) # true skeleton P <- sum(trueSkel) / 2 # number of positives + N <- sum(1-trueSkel) /2 # number of negative diffSkel <- estSkel - trueSkel extra_edges <- which(diffSkel > 0) # edges in estimated but not true EG FP <- length(extra_edges) / 2 # count to FPs @@ -42,17 +43,20 @@ compareEGs <- function(estEG, trueEG) { if (FP >= 0) { TPR <- 0 FPR_P <- 1 + FPR <- FP / N } else { TPR <- 1 FPR_P <- 0 + FPR <- 0 } } else { # true graph is non-empty - TPR <- TP / P - FPR_P <- FP / P + TPR <- TP / P + FPR_P <- FP / P + FPR <- FP / N } - compEGs <- c(TP, FP, SHD, TPR, FPR_P) - names(compEGs) <- c("TP", "FP", "SHD", "TPR", "FPR_P") + compEGs <- c(TP, FP, SHD, TPR, FPR_P, FPR) + names(compEGs) <- c("TP", "FP", "SHD", "TPR", "FPR_P", "FPR") return(compEGs) } @@ -142,7 +146,8 @@ benchmarks <- function(true_adjmat, estimated_adjmat) { graph_type <- "dag" } df <- data.frame( - TPR_pattern = compres["TPR"], # should be for all times + TPR_pattern = compres["TPR"], # should be for all times + FPR_pattern = compres["FPR"], FPRn_pattern = compres["FPR_P"], SHD_pattern = compres["SHD"], SHD_cpdag = SHD_cpdag, From 2808db9bd70258a451a45d12595db5f42ab098bb Mon Sep 17 00:00:00 2001 From: Mohamad Date: Tue, 27 Dec 2022 16:25:32 +0300 Subject: [PATCH 07/14] chaning burnin to prob, for traj plots and autocorrelation --- .../plot_autocorr_from_traj.py | 15 +++++++++------ .../evaluation/mcmc_autocorr_plots/schema.json | 8 ++++---- .../rules/evaluation/mcmc_traj_plots/schema.json | 8 ++++---- workflow/scripts/evaluation/write_graph_traj.py | 15 ++++++++------- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py b/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py index 48e86ebb..9962f0a5 100644 --- a/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py +++ b/workflow/rules/evaluation/mcmc_autocorr_plots/plot_autocorr_from_traj.py @@ -113,11 +113,13 @@ def edges_str_to_list(str, edgesymb="-"): df2 = df2.fillna(method="ffill") + burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0]) + if snakemake.wildcards["thinning"] != "None": - dfplot = df2["size"][int(snakemake.wildcards["burn_in"]):].iloc[::int( - snakemake.wildcards["thinning"])] + thinning = int(snakemake.wildcards["thinning"]) + dfplot = df2["size"][burnin:].iloc[::thinning] else: - dfplot = df2["size"][int(snakemake.wildcards["burn_in"]):] + dfplot = df2["size"][burnin:] # pd.plotting.autocorrelation_plot(dfplot.iloc[::int(snakemake.wildcards["thinning"])]) elif snakemake.wildcards["functional"] == "score": @@ -134,11 +136,12 @@ def edges_str_to_list(str, edgesymb="-"): df2.at[0,'score'] = int(0) df2 = df2.fillna(method="ffill") - + burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0]) if snakemake.wildcards["thinning"] != "None": - dfplot = df2[int(snakemake.wildcards["burn_in"]):][::int(snakemake.wildcards["thinning"])] + thinning = int(snakemake.wildcards["thinning"]) + dfplot = df2[burnin:].iloc[::thinning] else: - dfplot = df2[int(snakemake.wildcards["burn_in"]):] + dfplot = df2[burnin:] if snakemake.wildcards["lags"] != "None": sm.graphics.tsa.plot_acf(dfplot, lags=int(snakemake.wildcards["lags"])) diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json b/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json index 23fa7d4e..c91fb150 100644 --- a/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json +++ b/workflow/rules/evaluation/mcmc_autocorr_plots/schema.json @@ -9,7 +9,7 @@ "examples": [ { "id": "omcmc_itsample-bge", - "burn_in": 0, + "burn_in": 0.5, "thinning": 1, "lags": 50, "functional": [ @@ -26,7 +26,7 @@ "examples": [ { "id": "omcmc_itsample-bge", - "burn_in": 0, + "burn_in": 0.5, "thinning": 1, "lags": 50, "functional": [ @@ -41,7 +41,7 @@ "type": "string" }, "burn_in": { - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" }, "thinning": { "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegintnull" @@ -84,4 +84,4 @@ ] } } -} \ No newline at end of file +} diff --git a/workflow/rules/evaluation/mcmc_traj_plots/schema.json b/workflow/rules/evaluation/mcmc_traj_plots/schema.json index bf5e52e6..0efbf96a 100644 --- a/workflow/rules/evaluation/mcmc_traj_plots/schema.json +++ b/workflow/rules/evaluation/mcmc_traj_plots/schema.json @@ -9,7 +9,7 @@ "examples": [ { "id": "omcmc_itsample-bge", - "burn_in": 0, + "burn_in": 0.5, "thinning": 1, "functional": [ "score", @@ -23,7 +23,7 @@ "examples": [ { "id": "omcmc_itsample-bge", - "burn_in": 0, + "burn_in": 0.5, "thinning": 1, "functional": [ "score", @@ -40,7 +40,7 @@ "type": "string" }, "burn_in": { - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" }, "thinning": { "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegintnull" @@ -79,4 +79,4 @@ "additionalItems": false } } -} \ No newline at end of file +} diff --git a/workflow/scripts/evaluation/write_graph_traj.py b/workflow/scripts/evaluation/write_graph_traj.py index 2944b431..cc672335 100644 --- a/workflow/scripts/evaluation/write_graph_traj.py +++ b/workflow/scripts/evaluation/write_graph_traj.py @@ -53,12 +53,13 @@ def edges_str_to_list(str, edgesymb="-"): df2 = df2.reindex(newindex).reset_index().reindex( columns=df2.columns).fillna(method="ffill") - + + burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0]) if snakemake.wildcards["thinning"] != "None": - df_noburnin = df2[int(snakemake.wildcards["burn_in"]):][::int( + df_noburnin = df2[burnin:][::int( snakemake.wildcards["thinning"])] else: - df_noburnin = df2[int(snakemake.wildcards["burn_in"]):] + df_noburnin = df2[burnin:] # Score trajectory elif snakemake.wildcards["functional"] == "score": @@ -70,12 +71,12 @@ def edges_str_to_list(str, edgesymb="-"): df2 = df[["index", "score"]][2:].set_index("index") df2 = df2.reindex(newindex).reset_index().reindex( columns=df2.columns).fillna(method="ffill") - + burnin = int(float(snakemake.wildcards["burn_in"]) * df2.shape[0]) if snakemake.wildcards["thinning"] != "None": - df_noburnin = df2[int(snakemake.wildcards["burn_in"]):][::int( + df_noburnin = df2[burnin:][::int( snakemake.wildcards["thinning"])] else: - df_noburnin = df2[int(snakemake.wildcards["burn_in"]):] + df_noburnin = df2[burnin:] df_noburnin.rename(columns={snakemake.wildcards["functional"]:"plotvalue"}, inplace=True) @@ -115,4 +116,4 @@ def edges_str_to_list(str, edgesymb="-"): df_noburnin["alg_string"] = snakemake.params["alg_string"] df_noburnin["eval_string"] = snakemake.params["eval_string"] - df_noburnin.to_csv(snakemake.output["traj"]) \ No newline at end of file + df_noburnin.to_csv(snakemake.output["traj"]) From 1600fa93deadbd414bc1e4ed3d97ec0376e19d23 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Wed, 28 Dec 2022 15:07:56 +0300 Subject: [PATCH 08/14] issue with thining parameter when null --- .../evaluation/mcmc_autocorr_plots/write_autocorr_traj.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py b/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py index 16628a74..aae19505 100644 --- a/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py +++ b/workflow/rules/evaluation/mcmc_autocorr_plots/write_autocorr_traj.py @@ -12,7 +12,11 @@ varying_param = traj["param"].unique()[0] varying_param_val = traj["param_val"].unique()[0] -df = pd.DataFrame({"plotvalue":acs}, index=np.arange(1, int(snakemake.wildcards["lags"])+1) * int(snakemake.wildcards["thinning"])) +thinning = int(1) +if snakemake.wildcards["thinning"] != "None": + thinning = int(snakemake.wildcards["thinning"]) + +df = pd.DataFrame({"plotvalue":acs}, index=np.arange(1, int(snakemake.wildcards["lags"])+1) * thinning) df["param"] = varying_param From aaad5a4ccf6773d8b9f3deeb8e1871f00de19710 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Sun, 1 Jan 2023 14:06:47 +0300 Subject: [PATCH 09/14] fixing roc curves --- .../evaluation/benchmarks/combine_ROC_data.R | 12 ++-- .../rules/evaluation/benchmarks/plot_ROC.R | 56 ++++++++++++++----- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R index 77fe67c2..27f2bc3c 100644 --- a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R +++ b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R @@ -92,10 +92,14 @@ for (algorithm in active_algorithms) { TPR_pattern_median = median(TPR_pattern), TPR_pattern_q1 = quantile(TPR_pattern, probs = c(0.05)), TPR_pattern_q3 = quantile(TPR_pattern, probs = c(0.95)), - FPR_pattern_mean = mean(FPRn_pattern), - FPR_pattern_median = median(FPRn_pattern), - FPR_pattern_q1 = quantile(FPRn_pattern, probs = c(0.05)), - FPR_pattern_q3 = quantile(FPRn_pattern, probs = c(0.95)), + FPR_pattern_mean = mean(FPR_pattern), + FPR_pattern_median = median(FPR_pattern), + FPR_pattern_q1 = quantile(FPR_pattern, probs = c(0.05)), + FPR_pattern_q3 = quantile(FPR_pattern, probs = c(0.95)), + FPRn_pattern_mean = mean(FPRn_pattern), + FPRn_pattern_median = median(FPRn_pattern), + FPRn_pattern_q1 = quantile(FPRn_pattern, probs = c(0.05)), + FPRn_pattern_q3 = quantile(FPRn_pattern, probs = c(0.95)), TPR_skel_mean = mean(TP_skel / true_n_edges_skel), TPR_skel_median = median(TP_skel / true_n_edges_skel), TPR_skel_q1 = quantile(TP_skel / true_n_edges_skel, probs = c(0.05)), diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R index 4368ac6f..6e226013 100644 --- a/workflow/rules/evaluation/benchmarks/plot_ROC.R +++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R @@ -105,7 +105,7 @@ unique_data <- unique(joint_bench$data) geom_errorbar( data = toplot, alpha = 0.5, aes( - x = FPR_pattern_median, + x = FPRn_pattern_median, ymin = TPR_pattern_q1, ymax = TPR_pattern_q3, col = id_numlev @@ -120,8 +120,8 @@ unique_data <- unique(joint_bench$data) data = toplot, alpha = 0.5, aes( y = TPR_pattern_median, - xmin = FPR_pattern_q1, - xmax = FPR_pattern_q3, + xmin = FPRn_pattern_q1, + xmax = FPRn_pattern_q3, col = id_numlev ), height = 0.01 @@ -132,7 +132,7 @@ unique_data <- unique(joint_bench$data) geom_path( data = toplot, alpha = 0.8, aes( - x = FPR_pattern_median, + x = FPRn_pattern_median, y = TPR_pattern_median, col = id_numlev ) @@ -144,7 +144,7 @@ unique_data <- unique(joint_bench$data) geom_point( data = toplot, alpha = 0.5, aes( - x = FPR_pattern_median, + x = FPRn_pattern_median, y = TPR_pattern_median, col = id_numlev, shape = id_numlev @@ -182,11 +182,11 @@ unique_data <- unique(joint_bench$data) data = toplot %>% group_by(id, adjmat, parameters, data) %>% replace_na(list("curve_vals" = 0)) %>% - mutate(SHDP_pattern_median = 1 - TPR_pattern_median + FPR_pattern_median) %>% + mutate(SHDP_pattern_median = 1 - TPR_pattern_median + FPRn_pattern_median) %>% filter(SHDP_pattern_median == min(SHDP_pattern_median)), alpha = 0.8, position = "dodge", alpha = 1, show.legend = FALSE, aes( - x = FPR_pattern_median, y = TPR_pattern_median, + x = FPRn_pattern_median, y = TPR_pattern_median, col = id_numlev, label = id_num ) ) @@ -196,7 +196,7 @@ unique_data <- unique(joint_bench$data) geom_label( data = toplot, alpha = 0.5, aes( - x = FPR_pattern_median, + x = FPRn_pattern_median, y = TPR_pattern_median, label = curve_vals, col = id_numlev, shape = id_numlev ), @@ -348,13 +348,41 @@ unique_data <- unique(joint_bench$data) - gg <- ggplot() + { + gg <- ggplot() + { + { + if (errorbar) { + geom_errorbar( + data = toplot, alpha = 0.5, + aes( + x = FPR_pattern_mean, + ymin = TPR_pattern_q1, + ymax = TPR_pattern_q3, + col = id_numlev + ), + width = 0.01 + ) + } + } + { + + if (errorbarh) { + geom_errorbarh( + data = toplot, alpha = 0.5, + aes( + y = TPR_pattern_median, + xmin = FPR_pattern_q1, + xmax = FPR_pattern_q3, + col = id_numlev + ), + height = 0.01 + ) + } + if (path) { geom_path( data = toplot, alpha = 0.8, aes( - x = FPR_skel_median, - y = TPR_skel_median, + x = FPR_pattern_mean, + y = TPR_pattern_mean, col = id_numlev ) ) @@ -364,8 +392,8 @@ unique_data <- unique(joint_bench$data) geom_point( data = toplot, alpha = 0.5, aes( - x = FPR_skel_median, - y = TPR_skel_median, + x = FPR_pattern_median, + y = TPR_pattern_median, col = id_numlev, shape = id_numlev ), @@ -637,7 +665,7 @@ unique_data <- unique(joint_bench$data) # alpha=0.8, position = "dodge", alpha=1, # aes(segment.linetype="solid", segment.alpha=0.3, ## segment.color="black", - # x =FPR_pattern_median, y = TPR_pattern_median,col = id, label=id_num)) + # x =FPRn_pattern_median, y = TPR_pattern_median,col = id, label=id_num)) } } + { if (param_annot) { From 35502f46820976422738f7e01ddcf2a74c8c5cc0 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Sun, 1 Jan 2023 14:55:10 +0300 Subject: [PATCH 10/14] small typo --- workflow/rules/evaluation/benchmarks/run_summarise.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflow/rules/evaluation/benchmarks/run_summarise.R b/workflow/rules/evaluation/benchmarks/run_summarise.R index 7ff2a33b..44149138 100644 --- a/workflow/rules/evaluation/benchmarks/run_summarise.R +++ b/workflow/rules/evaluation/benchmarks/run_summarise.R @@ -171,7 +171,8 @@ if (file.info(argv$adjmat_est)$size > 0) { df <- benchmarks(true_adjmat, estimated_adjmat) } else { df <- data.frame( - TPR_pattern = "None", # should be for all times + TPR_pattern = "None", # should be for all times + FPR_pattern = "None", FPRn_pattern = "None", SHD_pattern = "None", SHD_cpdag = "None", From 73b6b28f77cf461aee8e6fb04a11e53d1cda5460 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Sun, 1 Jan 2023 15:49:59 +0300 Subject: [PATCH 11/14] fixing ROC plot to 0,1 --- .../rules/evaluation/benchmarks/plot_ROC.R | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R index 6e226013..4a46cd8e 100644 --- a/workflow/rules/evaluation/benchmarks/plot_ROC.R +++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R @@ -42,7 +42,7 @@ if (file.info(snakemake@input[["csv"]])$size == 0) { revlevlist <- c() revlevnumlist <- c() - lev <- levels(toplot$id) + lev <- levels(factor(toplot$id)) numlev <- c() for (i in seq_along(lev)) { @@ -63,7 +63,7 @@ if (file.info(snakemake@input[["csv"]])$size == 0) { # Add extra columns with id number like: 3 and id and level like: 3. pcalg_pc. toplot <- toplot %>% mutate(id_num = lev_to_num(id)) %>% - mutate(id_numlev = lev_to_levnum(id)) + mutate(id_numlev = lev_to_levnum(id)) toplot$id_numlev <- factor(toplot$id_numlev, levels = numlev) joint_bench <- joint_bench %>% mutate(id_num = lev_to_num(id)) %>% @@ -221,8 +221,8 @@ unique_data <- unique(joint_bench$data) labs(col = "id") + theme_bw() + theme(plot.title = element_text(hjust = 0.5)) - ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg) + ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg) gg <- ggplot() + { @@ -344,14 +344,12 @@ unique_data <- unique(joint_bench$data) labs(col = "id") + theme_bw() + theme(plot.title = element_text(hjust = 0.5)) - ggsave(file = paste(snakemake@output[["roc_FPRp_TPR_skel"]],"/", plt_counter, ".png", sep=""), plot = gg) - + ggsave(file = paste(snakemake@output[["roc_FPRp_TPR_skel"]],"/", plt_counter, ".png", sep=""), plot = gg) gg <- ggplot() + { - { - if (errorbar) { - geom_errorbar( + if (errorbar) { + geom_errorbar( data = toplot, alpha = 0.5, aes( x = FPR_pattern_mean, @@ -363,8 +361,7 @@ unique_data <- unique(joint_bench$data) ) } } + { - - if (errorbarh) { + if (errorbarh) { geom_errorbarh( data = toplot, alpha = 0.5, aes( @@ -376,8 +373,8 @@ unique_data <- unique(joint_bench$data) height = 0.01 ) } - - if (path) { + } + { + if (path) { geom_path( data = toplot, alpha = 0.8, aes( @@ -414,7 +411,7 @@ unique_data <- unique(joint_bench$data) ) } } + { - if (scatter && show_seed) { + if (scatter && show_seed) { geom_text( data = joint_bench, alpha = 0.25, show.legend = FALSE, aes( @@ -434,7 +431,8 @@ unique_data <- unique(joint_bench$data) filter(curve_vals == max(curve_vals)), alpha = 0.8, position = "dodge", alpha = 1, show.legend = FALSE, aes( - x = FPR_skel_median, y = TPR_skel_median, + x = FPR_pattern_mean, + y = TPR_pattern_mean, col = id_numlev, label = id_num ) ) @@ -444,9 +442,9 @@ unique_data <- unique(joint_bench$data) geom_label( data = toplot, alpha = 0.5, aes( - x = FPR_skel_median, - y = TPR_skel_median, - label = curve_vals, col = id_numlev, shape = id_numlev + x = FPR_pattern_mean, + y = TPR_pattern_mean, + label = curve_vals, col = id_numlev, shape = id_numlev ), check_overlap = FALSE ) @@ -469,8 +467,7 @@ unique_data <- unique(joint_bench$data) labs(col = "id") + theme_bw() + theme(plot.title = element_text(hjust = 0.5)) - ggsave(file = paste(snakemake@output[["roc_FPR_TPR"]],"/", plt_counter, ".png", sep=""), plot = gg) - + ggsave(file = paste(snakemake@output[["roc_FPR_TPR"]],"/", plt_counter, ".png", sep=""), plot = gg) gg <- ggplot() + From d39243cd0dce0fb006fbfc150f2179b266f2b14c Mon Sep 17 00:00:00 2001 From: Mohamad Date: Sun, 1 Jan 2023 16:16:13 +0300 Subject: [PATCH 12/14] fixing ROC ylim and xlim --- workflow/rules/evaluation/benchmarks/plot_ROC.R | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/workflow/rules/evaluation/benchmarks/plot_ROC.R b/workflow/rules/evaluation/benchmarks/plot_ROC.R index 4a46cd8e..13a3036e 100644 --- a/workflow/rules/evaluation/benchmarks/plot_ROC.R +++ b/workflow/rules/evaluation/benchmarks/plot_ROC.R @@ -224,7 +224,6 @@ unique_data <- unique(joint_bench$data) ggsave(file = paste(snakemake@output[["fpr_tpr_pattern"]],"/", plt_counter, ".png", sep=""), plot = gg) - gg <- ggplot() + { if (errorbar) { geom_errorbar( @@ -451,16 +450,9 @@ unique_data <- unique(joint_bench$data) } } + guides(shape = FALSE) + - facet_wrap(. ~ adjmat + parameters + data, nrow = 2) + - { - if (!is.null(xlim)) { - xlim(xlim[1], xlim[2]) - } - } + { - if (!is.null(ylim)) { - ylim(ylim[1], ylim[2]) - } - } + + facet_wrap(. ~ adjmat + parameters + data, nrow = 2) + + xlim(0, 1) + + ylim(0, 1) + xlab("FPR") + ylab("TPR") + ggtitle("FPR vs. TPR (undirected skeleton)") + From c301fa61ae25b66b6096ec0a6e15a09ab700f59e Mon Sep 17 00:00:00 2001 From: Mohamad Date: Tue, 3 Jan 2023 12:31:18 +0300 Subject: [PATCH 13/14] changing docs for burnin in autocorre, trajplot, heatmap --- workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst | 2 +- workflow/rules/evaluation/mcmc_heatmaps/docs.rst | 2 +- workflow/rules/evaluation/mcmc_heatmaps/schema.json | 4 ++-- workflow/rules/evaluation/mcmc_traj_plots/docs.rst | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst index 892201ef..5f351e82 100644 --- a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst +++ b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst @@ -6,7 +6,7 @@ This module plots the auto-correlation of a functional of the graphs in a MCMC t +----------------+----------------------------------------------------------------------------------------------------------------------------+ | ``id`` | algorithm module object id. | +----------------+----------------------------------------------------------------------------------------------------------------------------+ -| ``burn_in`` | use samples starting from this value. Use 0 if no burn-in. | +| ``burn_in`` | percent to burn of the number of samples. | +----------------+----------------------------------------------------------------------------------------------------------------------------+ | ``thinning`` | use only each ``thinning`` sample of the chain. (It is usually recommended to use this if the number of samples if large). | +----------------+----------------------------------------------------------------------------------------------------------------------------+ diff --git a/workflow/rules/evaluation/mcmc_heatmaps/docs.rst b/workflow/rules/evaluation/mcmc_heatmaps/docs.rst index 0b01966a..f27bca30 100644 --- a/workflow/rules/evaluation/mcmc_heatmaps/docs.rst +++ b/workflow/rules/evaluation/mcmc_heatmaps/docs.rst @@ -9,7 +9,7 @@ This module has a list of objects, where each object has +-------------+-------------------------+ | ``id`` | the algorithm object id | +-------------+-------------------------+ -| ``burn_in`` | the burn-in period. | +| ``burn_in`` | percent to burn of the number of samples. | +-------------+-------------------------+ The estimated probabilities are plotted in heatmaps using seaborn which are saved in *results/mcmc_heatmaps/* and copied to *results/output/mcmc_heatmaps/* for easy reference. diff --git a/workflow/rules/evaluation/mcmc_heatmaps/schema.json b/workflow/rules/evaluation/mcmc_heatmaps/schema.json index 8b318826..92bd4593 100644 --- a/workflow/rules/evaluation/mcmc_heatmaps/schema.json +++ b/workflow/rules/evaluation/mcmc_heatmaps/schema.json @@ -21,7 +21,7 @@ "type": "boolean" }, "burn_in": { - "title": "Burn-in start index.", + "title": "Burn-in percentage.", "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" } }, @@ -31,4 +31,4 @@ ] }, "additionalItems": false -} \ No newline at end of file +} diff --git a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst index 061a30fd..bbf8fe7d 100644 --- a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst +++ b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst @@ -7,7 +7,7 @@ The ``mcmc_traj_plots`` module has a list of objects, where each object has +----------------+-------------------------------------------------------------------+ | ``id`` | algorithm module object id. | +----------------+-------------------------------------------------------------------+ -| ``burn_in`` | use samples starting from this value. Use 0 if no burn-in. | +| ``burn_in`` | percent to burn of the number of samples. | +----------------+-------------------------------------------------------------------+ | ``functional`` | the currently supported functionals are *size* and graph *score*. | +----------------+-------------------------------------------------------------------+ From 71371d8436bcb65a1e8f86b0d70a3d86ce2e27c9 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Tue, 3 Jan 2023 12:59:15 +0300 Subject: [PATCH 14/14] fixing docs --- .../evaluation/mcmc_autocorr_plots/docs.rst | 10 ++++++--- .../rules/evaluation/mcmc_traj_plots/docs.rst | 22 +++++++++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst index 5f351e82..e66459fe 100644 --- a/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst +++ b/workflow/rules/evaluation/mcmc_autocorr_plots/docs.rst @@ -1,12 +1,10 @@ -This module plots the auto-correlation of a functional of the graphs in a MCMC trajectory. - +----------------+----------------------------------------------------------------------------------------------------------------------------+ | Field | Description | +----------------+----------------------------------------------------------------------------------------------------------------------------+ | ``id`` | algorithm module object id. | +----------------+----------------------------------------------------------------------------------------------------------------------------+ -| ``burn_in`` | percent to burn of the number of samples. | +| ``burn_in`` | percent to burn of the number of samples. | +----------------+----------------------------------------------------------------------------------------------------------------------------+ | ``thinning`` | use only each ``thinning`` sample of the chain. (It is usually recommended to use this if the number of samples if large). | +----------------+----------------------------------------------------------------------------------------------------------------------------+ @@ -14,6 +12,12 @@ This module plots the auto-correlation of a functional of the graphs in a MCMC t +----------------+----------------------------------------------------------------------------------------------------------------------------+ | ``lags`` | The maximum number of lags after ``thinning``. | +----------------+----------------------------------------------------------------------------------------------------------------------------+ +| ``active`` | should be set to *false* to deactivate. | ++----------------+----------------------------------------------------------------------------------------------------------------------------+ + + +This module plots the auto-correlation of a functional of the graphs in a MCMC trajectory. + .. figure:: _static/omcmcscoreautocorr.png :alt: Score trajectory of order MCMC diff --git a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst index bbf8fe7d..4c689050 100644 --- a/workflow/rules/evaluation/mcmc_traj_plots/docs.rst +++ b/workflow/rules/evaluation/mcmc_traj_plots/docs.rst @@ -2,15 +2,19 @@ This module plots the values in the trajectory of a given functional. The ``mcmc_traj_plots`` module has a list of objects, where each object has -+----------------+-------------------------------------------------------------------+ -| Field | Description | -+----------------+-------------------------------------------------------------------+ -| ``id`` | algorithm module object id. | -+----------------+-------------------------------------------------------------------+ -| ``burn_in`` | percent to burn of the number of samples. | -+----------------+-------------------------------------------------------------------+ -| ``functional`` | the currently supported functionals are *size* and graph *score*. | -+----------------+-------------------------------------------------------------------+ ++----------------+-------------------------------------------------------------------------------+ +| Field | Description | ++----------------+-------------------------------------------------------------------------------+ +| ``id`` | algorithm module object id. | ++----------------+-------------------------------------------------------------------------------+ +| ``burn_in`` | percent to burn of the number of samples. | ++----------------+-------------------------------------------------------------------------------+ +| ``functional`` | the currently supported functionals are *size* and graph *score*. | ++----------------+-------------------------------------------------------------------------------+ +|``thinning`` | tells the module to only considering every graph at a given interval length. | ++----------------+-------------------------------------------------------------------------------+ +|``active`` | should be set to *false* to deactivate. | ++----------------+-------------------------------------------------------------------------------+ Since the trajectories tend to be very long, the user may choose to thin out the trajectory by only considering every graph at a given interval length specified by the ``thinning`` field.